{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nes_py.wrappers import JoypadSpace\n",
    "import gym_super_mario_bros\n",
    "from gym_super_mario_bros.actions import SIMPLE_MOVEMENT\n",
    "import time\n",
    "from matplotlib import pyplot as plt\n",
    "from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack\n",
    "from stable_baselines3 import PPO\n",
    "\n",
    "from gym.wrappers import GrayScaleObservation\n",
    "\n",
    "from stable_baselines3.common.monitor import Monitor\n",
    "from stable_baselines3.common.results_plotter import load_results, ts2xy\n",
    "import numpy as np\n",
    "import os\n",
    "from stable_baselines3.common.callbacks import BaseCallback\n",
    "\n",
    "env = gym_super_mario_bros.make('SuperMarioBros-v0')\n",
    "env = JoypadSpace(env, SIMPLE_MOVEMENT)\n",
    "\n",
    "\n",
    "log_dir = './monitor_log/'\n",
    "os.makedirs(log_dir, exist_ok=True)\n",
    "\n",
    "env = Monitor(env, log_dir)\n",
    "\n",
    "env = GrayScaleObservation(env,keep_dim=True)\n",
    "env = DummyVecEnv([lambda: env])\n",
    "env = VecFrameStack(env,4,channels_order='last')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "class SaveOnStepCallback(BaseCallback):\n",
    "    \"\"\"\n",
    "    Callback for saving a model (the check is done every ``check_freq`` steps)\n",
    "    based on the training reward (in practice, we recommend using ``EvalCallback``).\n",
    "\n",
    "    :param check_freq: (int)\n",
    "    :param log_dir: (str) Path to the folder where the model will be saved.\n",
    "      It must contains the file created by the ``Monitor`` wrapper.\n",
    "    :param verbose: (int)\n",
    "    \"\"\"\n",
    "    def __init__(self, check_freq, save_path, verbose=1):\n",
    "        super(SaveOnStepCallback, self).__init__(verbose)\n",
    "        self.check_freq = check_freq\n",
    "        self.save_path = os.path.join(save_path, 'best_model')\n",
    "        \n",
    "\n",
    "    def _init_callback(self):\n",
    "        # Create folder if needed\n",
    "        if self.save_path is not None:\n",
    "            os.makedirs(self.save_path, exist_ok=True)\n",
    "\n",
    "    def _on_step(self):\n",
    "        if self.n_calls % self.check_freq == 0:\n",
    "            self.model.save(os.path.join(self.save_path,'_{}'.format(self.n_calls)))\n",
    "        \n",
    "        return True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using cuda device\n",
      "Wrapping the env in a VecTransposeImage.\n",
      "Logging to ./tensorboard_logs/PPO_1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\software\\e_anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------\n",
      "| time/              |      |\n",
      "|    fps             | 93   |\n",
      "|    iterations      | 1    |\n",
      "|    time_elapsed    | 21   |\n",
      "|    total_timesteps | 2048 |\n",
      "-----------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 72            |\n",
      "|    iterations           | 2             |\n",
      "|    time_elapsed         | 56            |\n",
      "|    total_timesteps      | 4096          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00010179842 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | -0.00362      |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 79.4          |\n",
      "|    n_updates            | 10            |\n",
      "|    policy_gradient_loss | -0.00037      |\n",
      "|    value_loss           | 144           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 66            |\n",
      "|    iterations           | 3             |\n",
      "|    time_elapsed         | 92            |\n",
      "|    total_timesteps      | 6144          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00023726805 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | 0.0672        |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.137         |\n",
      "|    n_updates            | 20            |\n",
      "|    policy_gradient_loss | -0.00075      |\n",
      "|    value_loss           | 0.345         |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 62            |\n",
      "|    iterations           | 4             |\n",
      "|    time_elapsed         | 131           |\n",
      "|    total_timesteps      | 8192          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00023196003 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | -0.00771      |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.0955        |\n",
      "|    n_updates            | 30            |\n",
      "|    policy_gradient_loss | -0.000961     |\n",
      "|    value_loss           | 0.316         |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 60            |\n",
      "|    iterations           | 5             |\n",
      "|    time_elapsed         | 168           |\n",
      "|    total_timesteps      | 10240         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00014397106 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.94         |\n",
      "|    explained_variance   | 0.0149        |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.328         |\n",
      "|    n_updates            | 40            |\n",
      "|    policy_gradient_loss | -0.000695     |\n",
      "|    value_loss           | 0.61          |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 59            |\n",
      "|    iterations           | 6             |\n",
      "|    time_elapsed         | 206           |\n",
      "|    total_timesteps      | 12288         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 4.8878865e-05 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.94         |\n",
      "|    explained_variance   | 0.082         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 32.8          |\n",
      "|    n_updates            | 50            |\n",
      "|    policy_gradient_loss | -0.000107     |\n",
      "|    value_loss           | 96.4          |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 58            |\n",
      "|    iterations           | 7             |\n",
      "|    time_elapsed         | 243           |\n",
      "|    total_timesteps      | 14336         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 4.7017937e-05 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.94         |\n",
      "|    explained_variance   | 0.251         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.196         |\n",
      "|    n_updates            | 60            |\n",
      "|    policy_gradient_loss | -0.000116     |\n",
      "|    value_loss           | 0.422         |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.62e+04     |\n",
      "|    ep_rew_mean          | 510          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 58           |\n",
      "|    iterations           | 8            |\n",
      "|    time_elapsed         | 281          |\n",
      "|    total_timesteps      | 16384        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0003870851 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.94        |\n",
      "|    explained_variance   | -0.0111      |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0972       |\n",
      "|    n_updates            | 70           |\n",
      "|    policy_gradient_loss | -0.000836    |\n",
      "|    value_loss           | 0.307        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.62e+04      |\n",
      "|    ep_rew_mean          | 510           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 9             |\n",
      "|    time_elapsed         | 321           |\n",
      "|    total_timesteps      | 18432         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00010889891 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.94         |\n",
      "|    explained_variance   | 0.167         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 19.7          |\n",
      "|    n_updates            | 80            |\n",
      "|    policy_gradient_loss | 8.98e-05      |\n",
      "|    value_loss           | 59.8          |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.62e+04      |\n",
      "|    ep_rew_mean          | 510           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 10            |\n",
      "|    time_elapsed         | 360           |\n",
      "|    total_timesteps      | 20480         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00014846769 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.94         |\n",
      "|    explained_variance   | 0.372         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 3.97          |\n",
      "|    n_updates            | 90            |\n",
      "|    policy_gradient_loss | -5.56e-05     |\n",
      "|    value_loss           | 33.3          |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.62e+04      |\n",
      "|    ep_rew_mean          | 510           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 11            |\n",
      "|    time_elapsed         | 397           |\n",
      "|    total_timesteps      | 22528         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00046419672 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.94         |\n",
      "|    explained_variance   | 0.879         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.168         |\n",
      "|    n_updates            | 100           |\n",
      "|    policy_gradient_loss | -0.000715     |\n",
      "|    value_loss           | 0.392         |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.62e+04      |\n",
      "|    ep_rew_mean          | 510           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 12            |\n",
      "|    time_elapsed         | 434           |\n",
      "|    total_timesteps      | 24576         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00039633812 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.93         |\n",
      "|    explained_variance   | -0.0226       |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.074         |\n",
      "|    n_updates            | 110           |\n",
      "|    policy_gradient_loss | -0.000696     |\n",
      "|    value_loss           | 0.182         |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.62e+04      |\n",
      "|    ep_rew_mean          | 510           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 13            |\n",
      "|    time_elapsed         | 472           |\n",
      "|    total_timesteps      | 26624         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00043613417 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.93         |\n",
      "|    explained_variance   | 0.522         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 22.8          |\n",
      "|    n_updates            | 120           |\n",
      "|    policy_gradient_loss | -0.000186     |\n",
      "|    value_loss           | 64.4          |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.62e+04      |\n",
      "|    ep_rew_mean          | 510           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 14            |\n",
      "|    time_elapsed         | 509           |\n",
      "|    total_timesteps      | 28672         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00048331858 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.92         |\n",
      "|    explained_variance   | 0.49          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 8.01          |\n",
      "|    n_updates            | 130           |\n",
      "|    policy_gradient_loss | -0.000616     |\n",
      "|    value_loss           | 37.7          |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.62e+04      |\n",
      "|    ep_rew_mean          | 510           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 15            |\n",
      "|    time_elapsed         | 543           |\n",
      "|    total_timesteps      | 30720         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00036760324 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.92         |\n",
      "|    explained_variance   | 0.631         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.233         |\n",
      "|    n_updates            | 140           |\n",
      "|    policy_gradient_loss | -0.000417     |\n",
      "|    value_loss           | 0.787         |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.62e+04      |\n",
      "|    ep_rew_mean          | 493           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 16            |\n",
      "|    time_elapsed         | 578           |\n",
      "|    total_timesteps      | 32768         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00016678739 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.92         |\n",
      "|    explained_variance   | 0.596         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.07          |\n",
      "|    n_updates            | 150           |\n",
      "|    policy_gradient_loss | -7.33e-05     |\n",
      "|    value_loss           | 0.359         |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.62e+04      |\n",
      "|    ep_rew_mean          | 493           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 17            |\n",
      "|    time_elapsed         | 613           |\n",
      "|    total_timesteps      | 34816         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00022093591 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.92         |\n",
      "|    explained_variance   | 0.6           |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 16.4          |\n",
      "|    n_updates            | 160           |\n",
      "|    policy_gradient_loss | -3.34e-05     |\n",
      "|    value_loss           | 59            |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.62e+04      |\n",
      "|    ep_rew_mean          | 493           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 18            |\n",
      "|    time_elapsed         | 648           |\n",
      "|    total_timesteps      | 36864         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00012937843 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.91         |\n",
      "|    explained_variance   | 0.658         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 5.04          |\n",
      "|    n_updates            | 170           |\n",
      "|    policy_gradient_loss | 5.49e-05      |\n",
      "|    value_loss           | 46.3          |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.62e+04     |\n",
      "|    ep_rew_mean          | 493          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 19           |\n",
      "|    time_elapsed         | 683          |\n",
      "|    total_timesteps      | 38912        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0015601851 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.91        |\n",
      "|    explained_variance   | 0.7          |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0859       |\n",
      "|    n_updates            | 180          |\n",
      "|    policy_gradient_loss | -0.00152     |\n",
      "|    value_loss           | 0.273        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.62e+04      |\n",
      "|    ep_rew_mean          | 493           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 20            |\n",
      "|    time_elapsed         | 719           |\n",
      "|    total_timesteps      | 40960         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00067680184 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.92         |\n",
      "|    explained_variance   | -0.112        |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.0795        |\n",
      "|    n_updates            | 190           |\n",
      "|    policy_gradient_loss | -0.000823     |\n",
      "|    value_loss           | 0.206         |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.38e+04      |\n",
      "|    ep_rew_mean          | 632           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 21            |\n",
      "|    time_elapsed         | 756           |\n",
      "|    total_timesteps      | 43008         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00033866215 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.91         |\n",
      "|    explained_variance   | 0.783         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 11.9          |\n",
      "|    n_updates            | 200           |\n",
      "|    policy_gradient_loss | -0.000635     |\n",
      "|    value_loss           | 46.8          |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.38e+04     |\n",
      "|    ep_rew_mean          | 632          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 22           |\n",
      "|    time_elapsed         | 794          |\n",
      "|    total_timesteps      | 45056        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0003210063 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.9         |\n",
      "|    explained_variance   | 0.714        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 39.1         |\n",
      "|    n_updates            | 210          |\n",
      "|    policy_gradient_loss | -0.000342    |\n",
      "|    value_loss           | 95.6         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.38e+04     |\n",
      "|    ep_rew_mean          | 632          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 23           |\n",
      "|    time_elapsed         | 831          |\n",
      "|    total_timesteps      | 47104        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006135469 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.89        |\n",
      "|    explained_variance   | 0.766        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 70.5         |\n",
      "|    n_updates            | 220          |\n",
      "|    policy_gradient_loss | -0.000594    |\n",
      "|    value_loss           | 107          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.38e+04     |\n",
      "|    ep_rew_mean          | 632          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 24           |\n",
      "|    time_elapsed         | 869          |\n",
      "|    total_timesteps      | 49152        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0018878896 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.89        |\n",
      "|    explained_variance   | -0.121       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0362       |\n",
      "|    n_updates            | 230          |\n",
      "|    policy_gradient_loss | -0.001       |\n",
      "|    value_loss           | 0.741        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.38e+04     |\n",
      "|    ep_rew_mean          | 632          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 25           |\n",
      "|    time_elapsed         | 906          |\n",
      "|    total_timesteps      | 51200        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 5.178855e-05 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.89        |\n",
      "|    explained_variance   | 0.638        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 2            |\n",
      "|    n_updates            | 240          |\n",
      "|    policy_gradient_loss | -0.000159    |\n",
      "|    value_loss           | 31.9         |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.38e+04    |\n",
      "|    ep_rew_mean          | 632         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 56          |\n",
      "|    iterations           | 26          |\n",
      "|    time_elapsed         | 942         |\n",
      "|    total_timesteps      | 53248       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004040206 |\n",
      "|    clip_fraction        | 0           |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.88       |\n",
      "|    explained_variance   | 0.641       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.488       |\n",
      "|    n_updates            | 250         |\n",
      "|    policy_gradient_loss | -0.00315    |\n",
      "|    value_loss           | 1.72        |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.37e+04      |\n",
      "|    ep_rew_mean          | 791           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 27            |\n",
      "|    time_elapsed         | 978           |\n",
      "|    total_timesteps      | 55296         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00025260833 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.88         |\n",
      "|    explained_variance   | 0.761         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 21            |\n",
      "|    n_updates            | 260           |\n",
      "|    policy_gradient_loss | 4.94e-05      |\n",
      "|    value_loss           | 61.1          |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.37e+04      |\n",
      "|    ep_rew_mean          | 791           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 28            |\n",
      "|    time_elapsed         | 1015          |\n",
      "|    total_timesteps      | 57344         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00021736507 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.87         |\n",
      "|    explained_variance   | 0.819         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 28.4          |\n",
      "|    n_updates            | 270           |\n",
      "|    policy_gradient_loss | -0.000245     |\n",
      "|    value_loss           | 90.3          |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.37e+04      |\n",
      "|    ep_rew_mean          | 791           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 29            |\n",
      "|    time_elapsed         | 1049          |\n",
      "|    total_timesteps      | 59392         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00015981309 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.87         |\n",
      "|    explained_variance   | 0.817         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 12.2          |\n",
      "|    n_updates            | 280           |\n",
      "|    policy_gradient_loss | -0.000335     |\n",
      "|    value_loss           | 28.7          |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.37e+04    |\n",
      "|    ep_rew_mean          | 791         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 56          |\n",
      "|    iterations           | 30          |\n",
      "|    time_elapsed         | 1085        |\n",
      "|    total_timesteps      | 61440       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004287295 |\n",
      "|    clip_fraction        | 0           |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.87       |\n",
      "|    explained_variance   | 0.899       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.0903      |\n",
      "|    n_updates            | 290         |\n",
      "|    policy_gradient_loss | -0.00271    |\n",
      "|    value_loss           | 1.08        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.37e+04    |\n",
      "|    ep_rew_mean          | 791         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 56          |\n",
      "|    iterations           | 31          |\n",
      "|    time_elapsed         | 1119        |\n",
      "|    total_timesteps      | 63488       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001636615 |\n",
      "|    clip_fraction        | 0           |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.88       |\n",
      "|    explained_variance   | 0.81        |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.101       |\n",
      "|    n_updates            | 300         |\n",
      "|    policy_gradient_loss | -0.00124    |\n",
      "|    value_loss           | 1.3         |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.37e+04      |\n",
      "|    ep_rew_mean          | 791           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 32            |\n",
      "|    time_elapsed         | 1153          |\n",
      "|    total_timesteps      | 65536         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00016593709 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.88         |\n",
      "|    explained_variance   | 0.812         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 27.5          |\n",
      "|    n_updates            | 310           |\n",
      "|    policy_gradient_loss | -0.000401     |\n",
      "|    value_loss           | 54.1          |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.37e+04     |\n",
      "|    ep_rew_mean          | 791          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 33           |\n",
      "|    time_elapsed         | 1187         |\n",
      "|    total_timesteps      | 67584        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0022098706 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.88        |\n",
      "|    explained_variance   | 0.896        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.201        |\n",
      "|    n_updates            | 320          |\n",
      "|    policy_gradient_loss | -0.00237     |\n",
      "|    value_loss           | 0.386        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.37e+04     |\n",
      "|    ep_rew_mean          | 791          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 34           |\n",
      "|    time_elapsed         | 1222         |\n",
      "|    total_timesteps      | 69632        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0003633223 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.88        |\n",
      "|    explained_variance   | 0.0198       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.149        |\n",
      "|    n_updates            | 330          |\n",
      "|    policy_gradient_loss | -0.000291    |\n",
      "|    value_loss           | 0.275        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.37e+04     |\n",
      "|    ep_rew_mean          | 791          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 35           |\n",
      "|    time_elapsed         | 1256         |\n",
      "|    total_timesteps      | 71680        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0023977351 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.89        |\n",
      "|    explained_variance   | 0.932        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.186        |\n",
      "|    n_updates            | 340          |\n",
      "|    policy_gradient_loss | -0.00244     |\n",
      "|    value_loss           | 0.318        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.45e+04     |\n",
      "|    ep_rew_mean          | 825          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 36           |\n",
      "|    time_elapsed         | 1290         |\n",
      "|    total_timesteps      | 73728        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0002675297 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.88        |\n",
      "|    explained_variance   | 0.861        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 40.6         |\n",
      "|    n_updates            | 350          |\n",
      "|    policy_gradient_loss | -0.000147    |\n",
      "|    value_loss           | 68.7         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.45e+04     |\n",
      "|    ep_rew_mean          | 825          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 37           |\n",
      "|    time_elapsed         | 1324         |\n",
      "|    total_timesteps      | 75776        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0004350581 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.87        |\n",
      "|    explained_variance   | 0.861        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 26.5         |\n",
      "|    n_updates            | 360          |\n",
      "|    policy_gradient_loss | -0.000476    |\n",
      "|    value_loss           | 115          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.45e+04     |\n",
      "|    ep_rew_mean          | 825          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 38           |\n",
      "|    time_elapsed         | 1359         |\n",
      "|    total_timesteps      | 77824        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014017555 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.87        |\n",
      "|    explained_variance   | 0.862        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0647       |\n",
      "|    n_updates            | 370          |\n",
      "|    policy_gradient_loss | -0.00105     |\n",
      "|    value_loss           | 0.287        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.45e+04     |\n",
      "|    ep_rew_mean          | 825          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 39           |\n",
      "|    time_elapsed         | 1393         |\n",
      "|    total_timesteps      | 79872        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0021404978 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.87        |\n",
      "|    explained_variance   | 0.819        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.125        |\n",
      "|    n_updates            | 380          |\n",
      "|    policy_gradient_loss | -0.00152     |\n",
      "|    value_loss           | 0.353        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.45e+04     |\n",
      "|    ep_rew_mean          | 825          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 40           |\n",
      "|    time_elapsed         | 1428         |\n",
      "|    total_timesteps      | 81920        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008465318 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.88        |\n",
      "|    explained_variance   | 0.923        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.101        |\n",
      "|    n_updates            | 390          |\n",
      "|    policy_gradient_loss | -0.000505    |\n",
      "|    value_loss           | 0.346        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.45e+04     |\n",
      "|    ep_rew_mean          | 825          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 41           |\n",
      "|    time_elapsed         | 1460         |\n",
      "|    total_timesteps      | 83968        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 1.997422e-05 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.87        |\n",
      "|    explained_variance   | 0.92         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 29.2         |\n",
      "|    n_updates            | 400          |\n",
      "|    policy_gradient_loss | -2.56e-05    |\n",
      "|    value_loss           | 48.4         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.45e+04     |\n",
      "|    ep_rew_mean          | 825          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 42           |\n",
      "|    time_elapsed         | 1495         |\n",
      "|    total_timesteps      | 86016        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0035748007 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.88        |\n",
      "|    explained_variance   | 0.487        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.14         |\n",
      "|    n_updates            | 410          |\n",
      "|    policy_gradient_loss | -0.00245     |\n",
      "|    value_loss           | 0.353        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.45e+04     |\n",
      "|    ep_rew_mean          | 825          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 43           |\n",
      "|    time_elapsed         | 1530         |\n",
      "|    total_timesteps      | 88064        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0015392425 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.87        |\n",
      "|    explained_variance   | 0.022        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0668       |\n",
      "|    n_updates            | 420          |\n",
      "|    policy_gradient_loss | -0.000849    |\n",
      "|    value_loss           | 0.265        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.45e+04      |\n",
      "|    ep_rew_mean          | 825           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 44            |\n",
      "|    time_elapsed         | 1565          |\n",
      "|    total_timesteps      | 90112         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 5.0315517e-05 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.86         |\n",
      "|    explained_variance   | 0.698         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 34.5          |\n",
      "|    n_updates            | 430           |\n",
      "|    policy_gradient_loss | -0.000319     |\n",
      "|    value_loss           | 47.1          |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.45e+04     |\n",
      "|    ep_rew_mean          | 825          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 45           |\n",
      "|    time_elapsed         | 1601         |\n",
      "|    total_timesteps      | 92160        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005745158 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.85        |\n",
      "|    explained_variance   | 0.929        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 20.6         |\n",
      "|    n_updates            | 440          |\n",
      "|    policy_gradient_loss | -0.000308    |\n",
      "|    value_loss           | 59.9         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.45e+04     |\n",
      "|    ep_rew_mean          | 825          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 46           |\n",
      "|    time_elapsed         | 1636         |\n",
      "|    total_timesteps      | 94208        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0026364732 |\n",
      "|    clip_fraction        | 0.00127      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.86        |\n",
      "|    explained_variance   | 0.879        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0938       |\n",
      "|    n_updates            | 450          |\n",
      "|    policy_gradient_loss | -0.00183     |\n",
      "|    value_loss           | 0.486        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.45e+04     |\n",
      "|    ep_rew_mean          | 825          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 47           |\n",
      "|    time_elapsed         | 1671         |\n",
      "|    total_timesteps      | 96256        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006548436 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.87        |\n",
      "|    explained_variance   | 0.0896       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0819       |\n",
      "|    n_updates            | 460          |\n",
      "|    policy_gradient_loss | -0.000288    |\n",
      "|    value_loss           | 0.171        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.61e+04     |\n",
      "|    ep_rew_mean          | 768          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 48           |\n",
      "|    time_elapsed         | 1707         |\n",
      "|    total_timesteps      | 98304        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0019195722 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.87        |\n",
      "|    explained_variance   | 0.774        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0838       |\n",
      "|    n_updates            | 470          |\n",
      "|    policy_gradient_loss | -0.00134     |\n",
      "|    value_loss           | 0.191        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.61e+04     |\n",
      "|    ep_rew_mean          | 768          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 49           |\n",
      "|    time_elapsed         | 1743         |\n",
      "|    total_timesteps      | 100352       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 7.087487e-05 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.87        |\n",
      "|    explained_variance   | 0.893        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 36.1         |\n",
      "|    n_updates            | 480          |\n",
      "|    policy_gradient_loss | 0.000249     |\n",
      "|    value_loss           | 69.6         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.61e+04     |\n",
      "|    ep_rew_mean          | 768          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 50           |\n",
      "|    time_elapsed         | 1779         |\n",
      "|    total_timesteps      | 102400       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0029316568 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.87        |\n",
      "|    explained_variance   | 0.83         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.21         |\n",
      "|    n_updates            | 490          |\n",
      "|    policy_gradient_loss | -0.0018      |\n",
      "|    value_loss           | 1.81         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.61e+04     |\n",
      "|    ep_rew_mean          | 768          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 51           |\n",
      "|    time_elapsed         | 1814         |\n",
      "|    total_timesteps      | 104448       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0022207156 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.88        |\n",
      "|    explained_variance   | 0.885        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.294        |\n",
      "|    n_updates            | 500          |\n",
      "|    policy_gradient_loss | -0.00131     |\n",
      "|    value_loss           | 1.09         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.61e+04     |\n",
      "|    ep_rew_mean          | 768          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 52           |\n",
      "|    time_elapsed         | 1851         |\n",
      "|    total_timesteps      | 106496       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0003766673 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.89        |\n",
      "|    explained_variance   | 0.881        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.294        |\n",
      "|    n_updates            | 510          |\n",
      "|    policy_gradient_loss | -0.000546    |\n",
      "|    value_loss           | 1.91         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.61e+04     |\n",
      "|    ep_rew_mean          | 768          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 53           |\n",
      "|    time_elapsed         | 1886         |\n",
      "|    total_timesteps      | 108544       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 5.075548e-05 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.89        |\n",
      "|    explained_variance   | 0.933        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 33.3         |\n",
      "|    n_updates            | 520          |\n",
      "|    policy_gradient_loss | -0.000124    |\n",
      "|    value_loss           | 66           |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.61e+04     |\n",
      "|    ep_rew_mean          | 768          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 54           |\n",
      "|    time_elapsed         | 1922         |\n",
      "|    total_timesteps      | 110592       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0023478125 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.88        |\n",
      "|    explained_variance   | 0.573        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0968       |\n",
      "|    n_updates            | 530          |\n",
      "|    policy_gradient_loss | -0.00136     |\n",
      "|    value_loss           | 0.946        |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.61e+04    |\n",
      "|    ep_rew_mean          | 768         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 55          |\n",
      "|    time_elapsed         | 1958        |\n",
      "|    total_timesteps      | 112640      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.000844729 |\n",
      "|    clip_fraction        | 0           |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.88       |\n",
      "|    explained_variance   | 0.937       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.371       |\n",
      "|    n_updates            | 540         |\n",
      "|    policy_gradient_loss | -0.0006     |\n",
      "|    value_loss           | 1.08        |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.61e+04      |\n",
      "|    ep_rew_mean          | 768           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 56            |\n",
      "|    time_elapsed         | 1993          |\n",
      "|    total_timesteps      | 114688        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00013972094 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.87         |\n",
      "|    explained_variance   | 0.893         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 4.98          |\n",
      "|    n_updates            | 550           |\n",
      "|    policy_gradient_loss | -6.23e-05     |\n",
      "|    value_loss           | 20            |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.61e+04      |\n",
      "|    ep_rew_mean          | 768           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 57            |\n",
      "|    time_elapsed         | 2028          |\n",
      "|    total_timesteps      | 116736        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00047176692 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.88         |\n",
      "|    explained_variance   | 0.944         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 27.9          |\n",
      "|    n_updates            | 560           |\n",
      "|    policy_gradient_loss | -0.00198      |\n",
      "|    value_loss           | 37.6          |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.61e+04     |\n",
      "|    ep_rew_mean          | 768          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 58           |\n",
      "|    time_elapsed         | 2064         |\n",
      "|    total_timesteps      | 118784       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0020032942 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.87        |\n",
      "|    explained_variance   | 0.725        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0852       |\n",
      "|    n_updates            | 570          |\n",
      "|    policy_gradient_loss | -0.00123     |\n",
      "|    value_loss           | 0.265        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.72e+04     |\n",
      "|    ep_rew_mean          | 771          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 59           |\n",
      "|    time_elapsed         | 2100         |\n",
      "|    total_timesteps      | 120832       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014797234 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.87        |\n",
      "|    explained_variance   | 0.942        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0782       |\n",
      "|    n_updates            | 580          |\n",
      "|    policy_gradient_loss | -0.00074     |\n",
      "|    value_loss           | 0.427        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.72e+04      |\n",
      "|    ep_rew_mean          | 771           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 60            |\n",
      "|    time_elapsed         | 2135          |\n",
      "|    total_timesteps      | 122880        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00020036838 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.85         |\n",
      "|    explained_variance   | 0.897         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 9.23          |\n",
      "|    n_updates            | 590           |\n",
      "|    policy_gradient_loss | -0.000643     |\n",
      "|    value_loss           | 101           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.72e+04      |\n",
      "|    ep_rew_mean          | 771           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 61            |\n",
      "|    time_elapsed         | 2171          |\n",
      "|    total_timesteps      | 124928        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00016480865 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.87         |\n",
      "|    explained_variance   | 0.787         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 9.99          |\n",
      "|    n_updates            | 600           |\n",
      "|    policy_gradient_loss | -0.000768     |\n",
      "|    value_loss           | 47.3          |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.72e+04    |\n",
      "|    ep_rew_mean          | 771         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 62          |\n",
      "|    time_elapsed         | 2206        |\n",
      "|    total_timesteps      | 126976      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004771644 |\n",
      "|    clip_fraction        | 0           |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.87       |\n",
      "|    explained_variance   | 0.888       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.117       |\n",
      "|    n_updates            | 610         |\n",
      "|    policy_gradient_loss | -0.00195    |\n",
      "|    value_loss           | 0.408       |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.72e+04    |\n",
      "|    ep_rew_mean          | 771         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 63          |\n",
      "|    time_elapsed         | 2242        |\n",
      "|    total_timesteps      | 129024      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004888015 |\n",
      "|    clip_fraction        | 0           |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.87       |\n",
      "|    explained_variance   | 0.908       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.286       |\n",
      "|    n_updates            | 620         |\n",
      "|    policy_gradient_loss | -0.00231    |\n",
      "|    value_loss           | 0.842       |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.72e+04      |\n",
      "|    ep_rew_mean          | 771           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 64            |\n",
      "|    time_elapsed         | 2277          |\n",
      "|    total_timesteps      | 131072        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00017689358 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.86         |\n",
      "|    explained_variance   | 0.925         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 27.4          |\n",
      "|    n_updates            | 630           |\n",
      "|    policy_gradient_loss | -8.82e-05     |\n",
      "|    value_loss           | 70.3          |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.72e+04     |\n",
      "|    ep_rew_mean          | 771          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 65           |\n",
      "|    time_elapsed         | 2312         |\n",
      "|    total_timesteps      | 133120       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0024139648 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.86        |\n",
      "|    explained_variance   | 0.804        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.373        |\n",
      "|    n_updates            | 640          |\n",
      "|    policy_gradient_loss | -0.00137     |\n",
      "|    value_loss           | 0.521        |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.72e+04    |\n",
      "|    ep_rew_mean          | 771         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 66          |\n",
      "|    time_elapsed         | 2348        |\n",
      "|    total_timesteps      | 135168      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004467798 |\n",
      "|    clip_fraction        | 0           |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.87       |\n",
      "|    explained_variance   | 0.807       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.136       |\n",
      "|    n_updates            | 650         |\n",
      "|    policy_gradient_loss | -0.00324    |\n",
      "|    value_loss           | 0.458       |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.72e+04     |\n",
      "|    ep_rew_mean          | 771          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 67           |\n",
      "|    time_elapsed         | 2383         |\n",
      "|    total_timesteps      | 137216       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0058765947 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.86        |\n",
      "|    explained_variance   | 0.887        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.108        |\n",
      "|    n_updates            | 660          |\n",
      "|    policy_gradient_loss | -0.00315     |\n",
      "|    value_loss           | 0.346        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.72e+04     |\n",
      "|    ep_rew_mean          | 771          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 68           |\n",
      "|    time_elapsed         | 2419         |\n",
      "|    total_timesteps      | 139264       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006232923 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.83        |\n",
      "|    explained_variance   | 0.944        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 77.6         |\n",
      "|    n_updates            | 670          |\n",
      "|    policy_gradient_loss | -0.00272     |\n",
      "|    value_loss           | 66           |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.72e+04      |\n",
      "|    ep_rew_mean          | 771           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 69            |\n",
      "|    time_elapsed         | 2455          |\n",
      "|    total_timesteps      | 141312        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00015999796 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.84         |\n",
      "|    explained_variance   | 0.698         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 27.1          |\n",
      "|    n_updates            | 680           |\n",
      "|    policy_gradient_loss | -0.00021      |\n",
      "|    value_loss           | 51            |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.72e+04    |\n",
      "|    ep_rew_mean          | 771         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 70          |\n",
      "|    time_elapsed         | 2490        |\n",
      "|    total_timesteps      | 143360      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008620543 |\n",
      "|    clip_fraction        | 0.00313     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.84       |\n",
      "|    explained_variance   | 0.854       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.155       |\n",
      "|    n_updates            | 690         |\n",
      "|    policy_gradient_loss | -0.00427    |\n",
      "|    value_loss           | 0.43        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.81e+04    |\n",
      "|    ep_rew_mean          | 756         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 71          |\n",
      "|    time_elapsed         | 2526        |\n",
      "|    total_timesteps      | 145408      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007837479 |\n",
      "|    clip_fraction        | 0.0135      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.84       |\n",
      "|    explained_variance   | 0.624       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.0693      |\n",
      "|    n_updates            | 700         |\n",
      "|    policy_gradient_loss | -0.00414    |\n",
      "|    value_loss           | 0.234       |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.81e+04      |\n",
      "|    ep_rew_mean          | 756           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 72            |\n",
      "|    time_elapsed         | 2561          |\n",
      "|    total_timesteps      | 147456        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00023764884 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.83         |\n",
      "|    explained_variance   | 0.95          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 50.1          |\n",
      "|    n_updates            | 710           |\n",
      "|    policy_gradient_loss | -0.000471     |\n",
      "|    value_loss           | 53.7          |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.81e+04    |\n",
      "|    ep_rew_mean          | 756         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 73          |\n",
      "|    time_elapsed         | 2596        |\n",
      "|    total_timesteps      | 149504      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.003828683 |\n",
      "|    clip_fraction        | 0           |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.83       |\n",
      "|    explained_variance   | 0.904       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.322       |\n",
      "|    n_updates            | 720         |\n",
      "|    policy_gradient_loss | -0.00309    |\n",
      "|    value_loss           | 0.422       |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.81e+04     |\n",
      "|    ep_rew_mean          | 756          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 74           |\n",
      "|    time_elapsed         | 2631         |\n",
      "|    total_timesteps      | 151552       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0044707214 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.82        |\n",
      "|    explained_variance   | 0.614        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0636       |\n",
      "|    n_updates            | 730          |\n",
      "|    policy_gradient_loss | -0.00324     |\n",
      "|    value_loss           | 0.264        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.81e+04     |\n",
      "|    ep_rew_mean          | 756          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 75           |\n",
      "|    time_elapsed         | 2666         |\n",
      "|    total_timesteps      | 153600       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0028213114 |\n",
      "|    clip_fraction        | 0.00122      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.81        |\n",
      "|    explained_variance   | 0.0651       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0862       |\n",
      "|    n_updates            | 740          |\n",
      "|    policy_gradient_loss | -0.00219     |\n",
      "|    value_loss           | 0.216        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.81e+04      |\n",
      "|    ep_rew_mean          | 756           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 76            |\n",
      "|    time_elapsed         | 2701          |\n",
      "|    total_timesteps      | 155648        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00048834836 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.78         |\n",
      "|    explained_variance   | 0.952         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 26.9          |\n",
      "|    n_updates            | 750           |\n",
      "|    policy_gradient_loss | -0.000817     |\n",
      "|    value_loss           | 96.2          |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.81e+04     |\n",
      "|    ep_rew_mean          | 756          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 77           |\n",
      "|    time_elapsed         | 2736         |\n",
      "|    total_timesteps      | 157696       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0018187723 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.79        |\n",
      "|    explained_variance   | -0.262       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.186        |\n",
      "|    n_updates            | 760          |\n",
      "|    policy_gradient_loss | -0.00131     |\n",
      "|    value_loss           | 0.459        |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.81e+04    |\n",
      "|    ep_rew_mean          | 756         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 78          |\n",
      "|    time_elapsed         | 2771        |\n",
      "|    total_timesteps      | 159744      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007637464 |\n",
      "|    clip_fraction        | 0           |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.77       |\n",
      "|    explained_variance   | -0.0914     |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.106       |\n",
      "|    n_updates            | 770         |\n",
      "|    policy_gradient_loss | -0.00387    |\n",
      "|    value_loss           | 0.441       |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.79e+04     |\n",
      "|    ep_rew_mean          | 726          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 79           |\n",
      "|    time_elapsed         | 2807         |\n",
      "|    total_timesteps      | 161792       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011832624 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.76        |\n",
      "|    explained_variance   | -0.23        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.139        |\n",
      "|    n_updates            | 780          |\n",
      "|    policy_gradient_loss | -0.000689    |\n",
      "|    value_loss           | 0.291        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.79e+04      |\n",
      "|    ep_rew_mean          | 726           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 80            |\n",
      "|    time_elapsed         | 2842          |\n",
      "|    total_timesteps      | 163840        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00036262488 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.73         |\n",
      "|    explained_variance   | 0.97          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 7.59          |\n",
      "|    n_updates            | 790           |\n",
      "|    policy_gradient_loss | -0.000987     |\n",
      "|    value_loss           | 56.3          |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.79e+04    |\n",
      "|    ep_rew_mean          | 726         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 81          |\n",
      "|    time_elapsed         | 2877        |\n",
      "|    total_timesteps      | 165888      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009281939 |\n",
      "|    clip_fraction        | 0.00122     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.76       |\n",
      "|    explained_variance   | -0.297      |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.133       |\n",
      "|    n_updates            | 800         |\n",
      "|    policy_gradient_loss | -0.00243    |\n",
      "|    value_loss           | 0.262       |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.79e+04     |\n",
      "|    ep_rew_mean          | 726          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 82           |\n",
      "|    time_elapsed         | 2912         |\n",
      "|    total_timesteps      | 167936       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0051096166 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.75        |\n",
      "|    explained_variance   | -0.16        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0579       |\n",
      "|    n_updates            | 810          |\n",
      "|    policy_gradient_loss | -0.00237     |\n",
      "|    value_loss           | 0.246        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.79e+04     |\n",
      "|    ep_rew_mean          | 726          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 83           |\n",
      "|    time_elapsed         | 2948         |\n",
      "|    total_timesteps      | 169984       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006515273 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.74        |\n",
      "|    explained_variance   | -0.0994      |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.157        |\n",
      "|    n_updates            | 820          |\n",
      "|    policy_gradient_loss | -0.000638    |\n",
      "|    value_loss           | 0.247        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.79e+04      |\n",
      "|    ep_rew_mean          | 726           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 84            |\n",
      "|    time_elapsed         | 2983          |\n",
      "|    total_timesteps      | 172032        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00030575495 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.71         |\n",
      "|    explained_variance   | 0.956         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 14.5          |\n",
      "|    n_updates            | 830           |\n",
      "|    policy_gradient_loss | -0.000676     |\n",
      "|    value_loss           | 81.4          |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.79e+04     |\n",
      "|    ep_rew_mean          | 726          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 85           |\n",
      "|    time_elapsed         | 3018         |\n",
      "|    total_timesteps      | 174080       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0067701708 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.73        |\n",
      "|    explained_variance   | 0.866        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.238        |\n",
      "|    n_updates            | 840          |\n",
      "|    policy_gradient_loss | -0.0033      |\n",
      "|    value_loss           | 0.396        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.79e+04     |\n",
      "|    ep_rew_mean          | 726          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 86           |\n",
      "|    time_elapsed         | 3055         |\n",
      "|    total_timesteps      | 176128       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0036652684 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.73        |\n",
      "|    explained_variance   | -0.02        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.142        |\n",
      "|    n_updates            | 850          |\n",
      "|    policy_gradient_loss | -0.00184     |\n",
      "|    value_loss           | 0.327        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.77e+04      |\n",
      "|    ep_rew_mean          | 700           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 87            |\n",
      "|    time_elapsed         | 3090          |\n",
      "|    total_timesteps      | 178176        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00037420864 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.72         |\n",
      "|    explained_variance   | 0.458         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.109         |\n",
      "|    n_updates            | 860           |\n",
      "|    policy_gradient_loss | -9.84e-05     |\n",
      "|    value_loss           | 0.226         |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.77e+04      |\n",
      "|    ep_rew_mean          | 700           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 88            |\n",
      "|    time_elapsed         | 3126          |\n",
      "|    total_timesteps      | 180224        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00029017622 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.66         |\n",
      "|    explained_variance   | 0.944         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 69.4          |\n",
      "|    n_updates            | 870           |\n",
      "|    policy_gradient_loss | -5.56e-05     |\n",
      "|    value_loss           | 161           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.77e+04     |\n",
      "|    ep_rew_mean          | 700          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 89           |\n",
      "|    time_elapsed         | 3163         |\n",
      "|    total_timesteps      | 182272       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012081778 |\n",
      "|    clip_fraction        | 0.00176      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.7         |\n",
      "|    explained_variance   | 0.769        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 63.3         |\n",
      "|    n_updates            | 880          |\n",
      "|    policy_gradient_loss | -0.001       |\n",
      "|    value_loss           | 62           |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.77e+04    |\n",
      "|    ep_rew_mean          | 700         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 90          |\n",
      "|    time_elapsed         | 3198        |\n",
      "|    total_timesteps      | 184320      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006663149 |\n",
      "|    clip_fraction        | 0.00649     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.71       |\n",
      "|    explained_variance   | 0.218       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.0651      |\n",
      "|    n_updates            | 890         |\n",
      "|    policy_gradient_loss | -0.00206    |\n",
      "|    value_loss           | 1.03        |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.77e+04     |\n",
      "|    ep_rew_mean          | 700          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 91           |\n",
      "|    time_elapsed         | 3234         |\n",
      "|    total_timesteps      | 186368       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0015178757 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.71        |\n",
      "|    explained_variance   | 0.08         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.164        |\n",
      "|    n_updates            | 900          |\n",
      "|    policy_gradient_loss | -0.000464    |\n",
      "|    value_loss           | 0.846        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.77e+04      |\n",
      "|    ep_rew_mean          | 700           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 92            |\n",
      "|    time_elapsed         | 3270          |\n",
      "|    total_timesteps      | 188416        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00042108994 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.68         |\n",
      "|    explained_variance   | 0.96          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 11.2          |\n",
      "|    n_updates            | 910           |\n",
      "|    policy_gradient_loss | -0.0006       |\n",
      "|    value_loss           | 44.2          |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.77e+04     |\n",
      "|    ep_rew_mean          | 700          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 93           |\n",
      "|    time_elapsed         | 3306         |\n",
      "|    total_timesteps      | 190464       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0017983642 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.7         |\n",
      "|    explained_variance   | 0.0407       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.101        |\n",
      "|    n_updates            | 920          |\n",
      "|    policy_gradient_loss | -0.00183     |\n",
      "|    value_loss           | 0.352        |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.77e+04    |\n",
      "|    ep_rew_mean          | 700         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 94          |\n",
      "|    time_elapsed         | 3342        |\n",
      "|    total_timesteps      | 192512      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009170841 |\n",
      "|    clip_fraction        | 0.00625     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.7        |\n",
      "|    explained_variance   | 0.0246      |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.107       |\n",
      "|    n_updates            | 930         |\n",
      "|    policy_gradient_loss | -0.00485    |\n",
      "|    value_loss           | 0.308       |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.76e+04     |\n",
      "|    ep_rew_mean          | 716          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 95           |\n",
      "|    time_elapsed         | 3378         |\n",
      "|    total_timesteps      | 194560       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0053140684 |\n",
      "|    clip_fraction        | 0.0146       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.68        |\n",
      "|    explained_variance   | 0.129        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.143        |\n",
      "|    n_updates            | 940          |\n",
      "|    policy_gradient_loss | -0.00389     |\n",
      "|    value_loss           | 0.167        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.76e+04      |\n",
      "|    ep_rew_mean          | 716           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 96            |\n",
      "|    time_elapsed         | 3414          |\n",
      "|    total_timesteps      | 196608        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00063435733 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.64         |\n",
      "|    explained_variance   | 0.979         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 8.91          |\n",
      "|    n_updates            | 950           |\n",
      "|    policy_gradient_loss | -0.000636     |\n",
      "|    value_loss           | 35.1          |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.76e+04     |\n",
      "|    ep_rew_mean          | 716          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 97           |\n",
      "|    time_elapsed         | 3449         |\n",
      "|    total_timesteps      | 198656       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0023861644 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.65        |\n",
      "|    explained_variance   | 0.224        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0524       |\n",
      "|    n_updates            | 960          |\n",
      "|    policy_gradient_loss | -0.00156     |\n",
      "|    value_loss           | 0.209        |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.76e+04    |\n",
      "|    ep_rew_mean          | 716         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 98          |\n",
      "|    time_elapsed         | 3485        |\n",
      "|    total_timesteps      | 200704      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007846017 |\n",
      "|    clip_fraction        | 0           |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.65       |\n",
      "|    explained_variance   | 0.0909      |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.0711      |\n",
      "|    n_updates            | 970         |\n",
      "|    policy_gradient_loss | -0.0022     |\n",
      "|    value_loss           | 0.188       |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.76e+04    |\n",
      "|    ep_rew_mean          | 716         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 99          |\n",
      "|    time_elapsed         | 3520        |\n",
      "|    total_timesteps      | 202752      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.000262754 |\n",
      "|    clip_fraction        | 0           |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.66       |\n",
      "|    explained_variance   | 0.195       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.089       |\n",
      "|    n_updates            | 980         |\n",
      "|    policy_gradient_loss | -6.89e-05   |\n",
      "|    value_loss           | 0.192       |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.76e+04      |\n",
      "|    ep_rew_mean          | 716           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 100           |\n",
      "|    time_elapsed         | 3556          |\n",
      "|    total_timesteps      | 204800        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00016431717 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.6          |\n",
      "|    explained_variance   | 0.937         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 62.2          |\n",
      "|    n_updates            | 990           |\n",
      "|    policy_gradient_loss | -0.000387     |\n",
      "|    value_loss           | 112           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.76e+04     |\n",
      "|    ep_rew_mean          | 716          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 101          |\n",
      "|    time_elapsed         | 3591         |\n",
      "|    total_timesteps      | 206848       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0065450747 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.63        |\n",
      "|    explained_variance   | -0.124       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.227        |\n",
      "|    n_updates            | 1000         |\n",
      "|    policy_gradient_loss | -0.00166     |\n",
      "|    value_loss           | 0.597        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.76e+04     |\n",
      "|    ep_rew_mean          | 716          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 102          |\n",
      "|    time_elapsed         | 3627         |\n",
      "|    total_timesteps      | 208896       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0013650465 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.64        |\n",
      "|    explained_variance   | -0.153       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.244        |\n",
      "|    n_updates            | 1010         |\n",
      "|    policy_gradient_loss | -0.000724    |\n",
      "|    value_loss           | 0.647        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.76e+04     |\n",
      "|    ep_rew_mean          | 716          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 103          |\n",
      "|    time_elapsed         | 3663         |\n",
      "|    total_timesteps      | 210944       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0027097096 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.66        |\n",
      "|    explained_variance   | -0.0999      |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.107        |\n",
      "|    n_updates            | 1020         |\n",
      "|    policy_gradient_loss | -0.00115     |\n",
      "|    value_loss           | 0.387        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.76e+04     |\n",
      "|    ep_rew_mean          | 716          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 104          |\n",
      "|    time_elapsed         | 3698         |\n",
      "|    total_timesteps      | 212992       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0020019356 |\n",
      "|    clip_fraction        | 0.000146     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.63        |\n",
      "|    explained_variance   | 0.943        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 10.6         |\n",
      "|    n_updates            | 1030         |\n",
      "|    policy_gradient_loss | -0.00262     |\n",
      "|    value_loss           | 79.5         |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 1.76e+04   |\n",
      "|    ep_rew_mean          | 716        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 57         |\n",
      "|    iterations           | 105        |\n",
      "|    time_elapsed         | 3733       |\n",
      "|    total_timesteps      | 215040     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.00818659 |\n",
      "|    clip_fraction        | 0.00986    |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.66      |\n",
      "|    explained_variance   | 0.111      |\n",
      "|    learning_rate        | 1e-06      |\n",
      "|    loss                 | 0.0765     |\n",
      "|    n_updates            | 1040       |\n",
      "|    policy_gradient_loss | -0.00345   |\n",
      "|    value_loss           | 0.196      |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.76e+04     |\n",
      "|    ep_rew_mean          | 716          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 106          |\n",
      "|    time_elapsed         | 3769         |\n",
      "|    total_timesteps      | 217088       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0019840226 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.66        |\n",
      "|    explained_variance   | 0.176        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.122        |\n",
      "|    n_updates            | 1050         |\n",
      "|    policy_gradient_loss | -0.000631    |\n",
      "|    value_loss           | 0.237        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.81e+04     |\n",
      "|    ep_rew_mean          | 700          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 107          |\n",
      "|    time_elapsed         | 3804         |\n",
      "|    total_timesteps      | 219136       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006077535 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.64        |\n",
      "|    explained_variance   | 0.0593       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0827       |\n",
      "|    n_updates            | 1060         |\n",
      "|    policy_gradient_loss | -0.000586    |\n",
      "|    value_loss           | 0.223        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.81e+04      |\n",
      "|    ep_rew_mean          | 700           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 108           |\n",
      "|    time_elapsed         | 3841          |\n",
      "|    total_timesteps      | 221184        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00033487796 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.61         |\n",
      "|    explained_variance   | 0.977         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 29.5          |\n",
      "|    n_updates            | 1070          |\n",
      "|    policy_gradient_loss | -0.00072      |\n",
      "|    value_loss           | 68.5          |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.81e+04    |\n",
      "|    ep_rew_mean          | 700         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 109         |\n",
      "|    time_elapsed         | 3875        |\n",
      "|    total_timesteps      | 223232      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.003225992 |\n",
      "|    clip_fraction        | 0           |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.64       |\n",
      "|    explained_variance   | 0.147       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.0505      |\n",
      "|    n_updates            | 1080        |\n",
      "|    policy_gradient_loss | -0.00147    |\n",
      "|    value_loss           | 0.228       |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.81e+04      |\n",
      "|    ep_rew_mean          | 700           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 110           |\n",
      "|    time_elapsed         | 3911          |\n",
      "|    total_timesteps      | 225280        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00083688064 |\n",
      "|    clip_fraction        | 0.000293      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.61         |\n",
      "|    explained_variance   | 0.767         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 38.4          |\n",
      "|    n_updates            | 1090          |\n",
      "|    policy_gradient_loss | -0.000117     |\n",
      "|    value_loss           | 81.7          |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.81e+04     |\n",
      "|    ep_rew_mean          | 700          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 111          |\n",
      "|    time_elapsed         | 3946         |\n",
      "|    total_timesteps      | 227328       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0059774616 |\n",
      "|    clip_fraction        | 0.0106       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.63        |\n",
      "|    explained_variance   | 0.32         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.137        |\n",
      "|    n_updates            | 1100         |\n",
      "|    policy_gradient_loss | -0.00267     |\n",
      "|    value_loss           | 0.651        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.81e+04      |\n",
      "|    ep_rew_mean          | 700           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 112           |\n",
      "|    time_elapsed         | 3981          |\n",
      "|    total_timesteps      | 229376        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00031284097 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.62         |\n",
      "|    explained_variance   | 0.966         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 23            |\n",
      "|    n_updates            | 1110          |\n",
      "|    policy_gradient_loss | -0.00105      |\n",
      "|    value_loss           | 61.2          |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.81e+04     |\n",
      "|    ep_rew_mean          | 700          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 113          |\n",
      "|    time_elapsed         | 4016         |\n",
      "|    total_timesteps      | 231424       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0040046433 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.64        |\n",
      "|    explained_variance   | 0.574        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0966       |\n",
      "|    n_updates            | 1120         |\n",
      "|    policy_gradient_loss | -0.00165     |\n",
      "|    value_loss           | 0.409        |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.81e+04    |\n",
      "|    ep_rew_mean          | 700         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 114         |\n",
      "|    time_elapsed         | 4051        |\n",
      "|    total_timesteps      | 233472      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007746046 |\n",
      "|    clip_fraction        | 0.00776     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.64       |\n",
      "|    explained_variance   | -0.00373    |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.12        |\n",
      "|    n_updates            | 1130        |\n",
      "|    policy_gradient_loss | -0.00348    |\n",
      "|    value_loss           | 0.24        |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.8e+04      |\n",
      "|    ep_rew_mean          | 687          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 115          |\n",
      "|    time_elapsed         | 4087         |\n",
      "|    total_timesteps      | 235520       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0041596238 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.63        |\n",
      "|    explained_variance   | 0.929        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.296        |\n",
      "|    n_updates            | 1140         |\n",
      "|    policy_gradient_loss | -0.00175     |\n",
      "|    value_loss           | 0.473        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.8e+04      |\n",
      "|    ep_rew_mean          | 687          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 116          |\n",
      "|    time_elapsed         | 4123         |\n",
      "|    total_timesteps      | 237568       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0033047483 |\n",
      "|    clip_fraction        | 0.000732     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.56        |\n",
      "|    explained_variance   | 0.973        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 21.1         |\n",
      "|    n_updates            | 1150         |\n",
      "|    policy_gradient_loss | -0.00309     |\n",
      "|    value_loss           | 41           |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.8e+04      |\n",
      "|    ep_rew_mean          | 687          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 117          |\n",
      "|    time_elapsed         | 4159         |\n",
      "|    total_timesteps      | 239616       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0018116548 |\n",
      "|    clip_fraction        | 0.0114       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.58        |\n",
      "|    explained_variance   | 0.756        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 15.3         |\n",
      "|    n_updates            | 1160         |\n",
      "|    policy_gradient_loss | 0.000388     |\n",
      "|    value_loss           | 65.3         |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.8e+04     |\n",
      "|    ep_rew_mean          | 687         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 118         |\n",
      "|    time_elapsed         | 4195        |\n",
      "|    total_timesteps      | 241664      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009344401 |\n",
      "|    clip_fraction        | 0.0144      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.56       |\n",
      "|    explained_variance   | 0.395       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.109       |\n",
      "|    n_updates            | 1170        |\n",
      "|    policy_gradient_loss | -0.00435    |\n",
      "|    value_loss           | 0.443       |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.8e+04     |\n",
      "|    ep_rew_mean          | 687         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 119         |\n",
      "|    time_elapsed         | 4230        |\n",
      "|    total_timesteps      | 243712      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001028619 |\n",
      "|    clip_fraction        | 0.00234     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.48       |\n",
      "|    explained_variance   | 0.946       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 30.2        |\n",
      "|    n_updates            | 1180        |\n",
      "|    policy_gradient_loss | 0.000629    |\n",
      "|    value_loss           | 36.4        |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.8e+04      |\n",
      "|    ep_rew_mean          | 687          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 120          |\n",
      "|    time_elapsed         | 4265         |\n",
      "|    total_timesteps      | 245760       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0002737776 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.49        |\n",
      "|    explained_variance   | 0.978        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 6.57         |\n",
      "|    n_updates            | 1190         |\n",
      "|    policy_gradient_loss | -0.000655    |\n",
      "|    value_loss           | 22.7         |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.8e+04       |\n",
      "|    ep_rew_mean          | 687           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 121           |\n",
      "|    time_elapsed         | 4301          |\n",
      "|    total_timesteps      | 247808        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00046640608 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.5          |\n",
      "|    explained_variance   | 0.805         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 42.3          |\n",
      "|    n_updates            | 1200          |\n",
      "|    policy_gradient_loss | -0.000315     |\n",
      "|    value_loss           | 63.2          |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.8e+04      |\n",
      "|    ep_rew_mean          | 687          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 122          |\n",
      "|    time_elapsed         | 4337         |\n",
      "|    total_timesteps      | 249856       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0053231334 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.5         |\n",
      "|    explained_variance   | 0.265        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.258        |\n",
      "|    n_updates            | 1210         |\n",
      "|    policy_gradient_loss | -0.00173     |\n",
      "|    value_loss           | 0.587        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.8e+04      |\n",
      "|    ep_rew_mean          | 687          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 123          |\n",
      "|    time_elapsed         | 4373         |\n",
      "|    total_timesteps      | 251904       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0003328001 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.47        |\n",
      "|    explained_variance   | 0.952        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 39.7         |\n",
      "|    n_updates            | 1220         |\n",
      "|    policy_gradient_loss | -0.000776    |\n",
      "|    value_loss           | 58.7         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.8e+04      |\n",
      "|    ep_rew_mean          | 687          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 124          |\n",
      "|    time_elapsed         | 4408         |\n",
      "|    total_timesteps      | 253952       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0020407587 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.5         |\n",
      "|    explained_variance   | -0.0815      |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.07         |\n",
      "|    n_updates            | 1230         |\n",
      "|    policy_gradient_loss | -0.000407    |\n",
      "|    value_loss           | 0.167        |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.8e+04     |\n",
      "|    ep_rew_mean          | 687         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 125         |\n",
      "|    time_elapsed         | 4444        |\n",
      "|    total_timesteps      | 256000      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.015883144 |\n",
      "|    clip_fraction        | 0.0635      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.44       |\n",
      "|    explained_variance   | 0.00316     |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.0625      |\n",
      "|    n_updates            | 1240        |\n",
      "|    policy_gradient_loss | -0.00833    |\n",
      "|    value_loss           | 0.174       |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.84e+04    |\n",
      "|    ep_rew_mean          | 694         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 126         |\n",
      "|    time_elapsed         | 4479        |\n",
      "|    total_timesteps      | 258048      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.003970587 |\n",
      "|    clip_fraction        | 0.0116      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.45       |\n",
      "|    explained_variance   | 0.773       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 5.31        |\n",
      "|    n_updates            | 1250        |\n",
      "|    policy_gradient_loss | 0.00115     |\n",
      "|    value_loss           | 54          |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.84e+04    |\n",
      "|    ep_rew_mean          | 694         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 127         |\n",
      "|    time_elapsed         | 4515        |\n",
      "|    total_timesteps      | 260096      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008021679 |\n",
      "|    clip_fraction        | 0.0471      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.37       |\n",
      "|    explained_variance   | 0.962       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 22.6        |\n",
      "|    n_updates            | 1260        |\n",
      "|    policy_gradient_loss | -0.00225    |\n",
      "|    value_loss           | 33          |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.84e+04     |\n",
      "|    ep_rew_mean          | 694          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 128          |\n",
      "|    time_elapsed         | 4551         |\n",
      "|    total_timesteps      | 262144       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0036926544 |\n",
      "|    clip_fraction        | 0.0129       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.43        |\n",
      "|    explained_variance   | 0.118        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0907       |\n",
      "|    n_updates            | 1270         |\n",
      "|    policy_gradient_loss | -0.00191     |\n",
      "|    value_loss           | 0.224        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.84e+04     |\n",
      "|    ep_rew_mean          | 694          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 129          |\n",
      "|    time_elapsed         | 4586         |\n",
      "|    total_timesteps      | 264192       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012470105 |\n",
      "|    clip_fraction        | 0.00146      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.46        |\n",
      "|    explained_variance   | 0.184        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0729       |\n",
      "|    n_updates            | 1280         |\n",
      "|    policy_gradient_loss | -0.00126     |\n",
      "|    value_loss           | 0.214        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.84e+04     |\n",
      "|    ep_rew_mean          | 694          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 130          |\n",
      "|    time_elapsed         | 4622         |\n",
      "|    total_timesteps      | 266240       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0030992527 |\n",
      "|    clip_fraction        | 0.00688      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.49        |\n",
      "|    explained_variance   | 0.614        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 50.6         |\n",
      "|    n_updates            | 1290         |\n",
      "|    policy_gradient_loss | 0.00139      |\n",
      "|    value_loss           | 57.2         |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.84e+04    |\n",
      "|    ep_rew_mean          | 694         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 131         |\n",
      "|    time_elapsed         | 4657        |\n",
      "|    total_timesteps      | 268288      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.000491274 |\n",
      "|    clip_fraction        | 0           |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.45       |\n",
      "|    explained_variance   | 0.954       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 11.2        |\n",
      "|    n_updates            | 1300        |\n",
      "|    policy_gradient_loss | 7.42e-05    |\n",
      "|    value_loss           | 73.3        |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.84e+04     |\n",
      "|    ep_rew_mean          | 694          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 132          |\n",
      "|    time_elapsed         | 4693         |\n",
      "|    total_timesteps      | 270336       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0035198163 |\n",
      "|    clip_fraction        | 0.00742      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.53        |\n",
      "|    explained_variance   | 0.764        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 39.7         |\n",
      "|    n_updates            | 1310         |\n",
      "|    policy_gradient_loss | -0.000733    |\n",
      "|    value_loss           | 51.2         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.84e+04     |\n",
      "|    ep_rew_mean          | 694          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 133          |\n",
      "|    time_elapsed         | 4728         |\n",
      "|    total_timesteps      | 272384       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0037851264 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.59        |\n",
      "|    explained_variance   | 0.39         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.282        |\n",
      "|    n_updates            | 1320         |\n",
      "|    policy_gradient_loss | -0.00154     |\n",
      "|    value_loss           | 0.495        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.84e+04     |\n",
      "|    ep_rew_mean          | 694          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 134          |\n",
      "|    time_elapsed         | 4763         |\n",
      "|    total_timesteps      | 274432       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011601939 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.61        |\n",
      "|    explained_variance   | 0.469        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.16         |\n",
      "|    n_updates            | 1330         |\n",
      "|    policy_gradient_loss | -0.000761    |\n",
      "|    value_loss           | 0.303        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.84e+04      |\n",
      "|    ep_rew_mean          | 694           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 135           |\n",
      "|    time_elapsed         | 4799          |\n",
      "|    total_timesteps      | 276480        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00015574889 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.53         |\n",
      "|    explained_variance   | 0.944         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 34            |\n",
      "|    n_updates            | 1340          |\n",
      "|    policy_gradient_loss | -0.000807     |\n",
      "|    value_loss           | 94.4          |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.84e+04     |\n",
      "|    ep_rew_mean          | 694          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 136          |\n",
      "|    time_elapsed         | 4834         |\n",
      "|    total_timesteps      | 278528       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011465825 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.58        |\n",
      "|    explained_variance   | 0.0119       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.229        |\n",
      "|    n_updates            | 1350         |\n",
      "|    policy_gradient_loss | -0.000332    |\n",
      "|    value_loss           | 0.48         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.84e+04     |\n",
      "|    ep_rew_mean          | 694          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 137          |\n",
      "|    time_elapsed         | 4870         |\n",
      "|    total_timesteps      | 280576       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0037404299 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.61        |\n",
      "|    explained_variance   | -0.011       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.242        |\n",
      "|    n_updates            | 1360         |\n",
      "|    policy_gradient_loss | -0.00132     |\n",
      "|    value_loss           | 0.436        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.88e+04     |\n",
      "|    ep_rew_mean          | 699          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 138          |\n",
      "|    time_elapsed         | 4906         |\n",
      "|    total_timesteps      | 282624       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0031028814 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.61        |\n",
      "|    explained_variance   | 0.104        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.147        |\n",
      "|    n_updates            | 1370         |\n",
      "|    policy_gradient_loss | -0.00182     |\n",
      "|    value_loss           | 0.337        |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.88e+04    |\n",
      "|    ep_rew_mean          | 699         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 139         |\n",
      "|    time_elapsed         | 4942        |\n",
      "|    total_timesteps      | 284672      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007294269 |\n",
      "|    clip_fraction        | 0.0407      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.47       |\n",
      "|    explained_variance   | 0.956       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 27.5        |\n",
      "|    n_updates            | 1380        |\n",
      "|    policy_gradient_loss | 8.09e-05    |\n",
      "|    value_loss           | 35.6        |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.88e+04     |\n",
      "|    ep_rew_mean          | 699          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 140          |\n",
      "|    time_elapsed         | 4978         |\n",
      "|    total_timesteps      | 286720       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006849321 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.41        |\n",
      "|    explained_variance   | 0.959        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 16.1         |\n",
      "|    n_updates            | 1390         |\n",
      "|    policy_gradient_loss | 0.000244     |\n",
      "|    value_loss           | 101          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.88e+04    |\n",
      "|    ep_rew_mean          | 699         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 141         |\n",
      "|    time_elapsed         | 5013        |\n",
      "|    total_timesteps      | 288768      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006418345 |\n",
      "|    clip_fraction        | 0.0204      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.51       |\n",
      "|    explained_variance   | 0.763       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 44.9        |\n",
      "|    n_updates            | 1400        |\n",
      "|    policy_gradient_loss | 0.000276    |\n",
      "|    value_loss           | 50.8        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.88e+04    |\n",
      "|    ep_rew_mean          | 699         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 142         |\n",
      "|    time_elapsed         | 5049        |\n",
      "|    total_timesteps      | 290816      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006311885 |\n",
      "|    clip_fraction        | 0.0204      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.52       |\n",
      "|    explained_variance   | 0.555       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.154       |\n",
      "|    n_updates            | 1410        |\n",
      "|    policy_gradient_loss | -0.00397    |\n",
      "|    value_loss           | 0.361       |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.83e+04     |\n",
      "|    ep_rew_mean          | 718          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 143          |\n",
      "|    time_elapsed         | 5084         |\n",
      "|    total_timesteps      | 292864       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0036124708 |\n",
      "|    clip_fraction        | 0.00112      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.51        |\n",
      "|    explained_variance   | 0.383        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.148        |\n",
      "|    n_updates            | 1420         |\n",
      "|    policy_gradient_loss | -0.00122     |\n",
      "|    value_loss           | 0.393        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.83e+04     |\n",
      "|    ep_rew_mean          | 718          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 144          |\n",
      "|    time_elapsed         | 5120         |\n",
      "|    total_timesteps      | 294912       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0072164326 |\n",
      "|    clip_fraction        | 0.0292       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.41        |\n",
      "|    explained_variance   | 0.977        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 31.9         |\n",
      "|    n_updates            | 1430         |\n",
      "|    policy_gradient_loss | 0.000133     |\n",
      "|    value_loss           | 37           |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.83e+04    |\n",
      "|    ep_rew_mean          | 718         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 145         |\n",
      "|    time_elapsed         | 5155        |\n",
      "|    total_timesteps      | 296960      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004997646 |\n",
      "|    clip_fraction        | 0.0156      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.47       |\n",
      "|    explained_variance   | 0.798       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 8.67        |\n",
      "|    n_updates            | 1440        |\n",
      "|    policy_gradient_loss | -1.72e-05   |\n",
      "|    value_loss           | 46.5        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.83e+04    |\n",
      "|    ep_rew_mean          | 718         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 146         |\n",
      "|    time_elapsed         | 5191        |\n",
      "|    total_timesteps      | 299008      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.010296438 |\n",
      "|    clip_fraction        | 0.002       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.5        |\n",
      "|    explained_variance   | 0.504       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.113       |\n",
      "|    n_updates            | 1450        |\n",
      "|    policy_gradient_loss | -0.00214    |\n",
      "|    value_loss           | 0.219       |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.77e+04     |\n",
      "|    ep_rew_mean          | 719          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 147          |\n",
      "|    time_elapsed         | 5227         |\n",
      "|    total_timesteps      | 301056       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0030808374 |\n",
      "|    clip_fraction        | 0.00225      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.51        |\n",
      "|    explained_variance   | 0.288        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.141        |\n",
      "|    n_updates            | 1460         |\n",
      "|    policy_gradient_loss | -0.000461    |\n",
      "|    value_loss           | 0.219        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.68e+04     |\n",
      "|    ep_rew_mean          | 740          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 148          |\n",
      "|    time_elapsed         | 5263         |\n",
      "|    total_timesteps      | 303104       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0023163254 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.39        |\n",
      "|    explained_variance   | 0.937        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 89.5         |\n",
      "|    n_updates            | 1470         |\n",
      "|    policy_gradient_loss | -0.00143     |\n",
      "|    value_loss           | 196          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.68e+04      |\n",
      "|    ep_rew_mean          | 740           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 149           |\n",
      "|    time_elapsed         | 5298          |\n",
      "|    total_timesteps      | 305152        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00025619016 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.32         |\n",
      "|    explained_variance   | 0.9           |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 188           |\n",
      "|    n_updates            | 1480          |\n",
      "|    policy_gradient_loss | -0.000807     |\n",
      "|    value_loss           | 351           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.68e+04     |\n",
      "|    ep_rew_mean          | 740          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 150          |\n",
      "|    time_elapsed         | 5333         |\n",
      "|    total_timesteps      | 307200       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0019795285 |\n",
      "|    clip_fraction        | 0.000244     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.38        |\n",
      "|    explained_variance   | 0.989        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 5.42         |\n",
      "|    n_updates            | 1490         |\n",
      "|    policy_gradient_loss | -0.000735    |\n",
      "|    value_loss           | 8.58         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.68e+04     |\n",
      "|    ep_rew_mean          | 740          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 151          |\n",
      "|    time_elapsed         | 5369         |\n",
      "|    total_timesteps      | 309248       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0038547586 |\n",
      "|    clip_fraction        | 0.0184       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.44        |\n",
      "|    explained_variance   | 0.734        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 2.86         |\n",
      "|    n_updates            | 1500         |\n",
      "|    policy_gradient_loss | 0.000843     |\n",
      "|    value_loss           | 51.1         |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.64e+04    |\n",
      "|    ep_rew_mean          | 759         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 152         |\n",
      "|    time_elapsed         | 5404        |\n",
      "|    total_timesteps      | 311296      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006646519 |\n",
      "|    clip_fraction        | 0.016       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.5        |\n",
      "|    explained_variance   | 0.154       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.138       |\n",
      "|    n_updates            | 1510        |\n",
      "|    policy_gradient_loss | -0.00275    |\n",
      "|    value_loss           | 0.233       |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.64e+04     |\n",
      "|    ep_rew_mean          | 759          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 153          |\n",
      "|    time_elapsed         | 5440         |\n",
      "|    total_timesteps      | 313344       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0028261873 |\n",
      "|    clip_fraction        | 0.0215       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.41        |\n",
      "|    explained_variance   | 0.982        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 9.43         |\n",
      "|    n_updates            | 1520         |\n",
      "|    policy_gradient_loss | 2.75e-05     |\n",
      "|    value_loss           | 25.2         |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.64e+04      |\n",
      "|    ep_rew_mean          | 759           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 154           |\n",
      "|    time_elapsed         | 5475          |\n",
      "|    total_timesteps      | 315392        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00022845904 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.4          |\n",
      "|    explained_variance   | 0.912         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 28.8          |\n",
      "|    n_updates            | 1530          |\n",
      "|    policy_gradient_loss | -0.000854     |\n",
      "|    value_loss           | 54.3          |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.64e+04     |\n",
      "|    ep_rew_mean          | 759          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 155          |\n",
      "|    time_elapsed         | 5510         |\n",
      "|    total_timesteps      | 317440       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0022912328 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.44        |\n",
      "|    explained_variance   | 0.139        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.179        |\n",
      "|    n_updates            | 1540         |\n",
      "|    policy_gradient_loss | -0.00108     |\n",
      "|    value_loss           | 0.337        |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.64e+04    |\n",
      "|    ep_rew_mean          | 759         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 156         |\n",
      "|    time_elapsed         | 5546        |\n",
      "|    total_timesteps      | 319488      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.005278347 |\n",
      "|    clip_fraction        | 0.00591     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.43       |\n",
      "|    explained_variance   | -0.0627     |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.275       |\n",
      "|    n_updates            | 1550        |\n",
      "|    policy_gradient_loss | -0.00209    |\n",
      "|    value_loss           | 0.276       |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.64e+04      |\n",
      "|    ep_rew_mean          | 759           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 157           |\n",
      "|    time_elapsed         | 5583          |\n",
      "|    total_timesteps      | 321536        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00039481965 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.33         |\n",
      "|    explained_variance   | 0.963         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 12.7          |\n",
      "|    n_updates            | 1560          |\n",
      "|    policy_gradient_loss | -0.000735     |\n",
      "|    value_loss           | 68.1          |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.64e+04     |\n",
      "|    ep_rew_mean          | 759          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 158          |\n",
      "|    time_elapsed         | 5616         |\n",
      "|    total_timesteps      | 323584       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0072179255 |\n",
      "|    clip_fraction        | 0.0175       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.43        |\n",
      "|    explained_variance   | 0.702        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 32.3         |\n",
      "|    n_updates            | 1570         |\n",
      "|    policy_gradient_loss | -0.000534    |\n",
      "|    value_loss           | 39.3         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.64e+04     |\n",
      "|    ep_rew_mean          | 759          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 159          |\n",
      "|    time_elapsed         | 5652         |\n",
      "|    total_timesteps      | 325632       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0056525953 |\n",
      "|    clip_fraction        | 0.0312       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.52        |\n",
      "|    explained_variance   | -0.191       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.215        |\n",
      "|    n_updates            | 1580         |\n",
      "|    policy_gradient_loss | -0.00284     |\n",
      "|    value_loss           | 0.61         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.64e+04     |\n",
      "|    ep_rew_mean          | 757          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 160          |\n",
      "|    time_elapsed         | 5687         |\n",
      "|    total_timesteps      | 327680       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0038557602 |\n",
      "|    clip_fraction        | 0.0042       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.57        |\n",
      "|    explained_variance   | -0.404       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.231        |\n",
      "|    n_updates            | 1590         |\n",
      "|    policy_gradient_loss | -0.0016      |\n",
      "|    value_loss           | 0.406        |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.64e+04    |\n",
      "|    ep_rew_mean          | 757         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 161         |\n",
      "|    time_elapsed         | 5722        |\n",
      "|    total_timesteps      | 329728      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009199668 |\n",
      "|    clip_fraction        | 0.0349      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.41       |\n",
      "|    explained_variance   | 0.966       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 18.7        |\n",
      "|    n_updates            | 1600        |\n",
      "|    policy_gradient_loss | 0.00243     |\n",
      "|    value_loss           | 39.6        |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.64e+04     |\n",
      "|    ep_rew_mean          | 757          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 162          |\n",
      "|    time_elapsed         | 5758         |\n",
      "|    total_timesteps      | 331776       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014474003 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.44        |\n",
      "|    explained_variance   | 0.121        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.237        |\n",
      "|    n_updates            | 1610         |\n",
      "|    policy_gradient_loss | -0.00033     |\n",
      "|    value_loss           | 0.583        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.64e+04     |\n",
      "|    ep_rew_mean          | 757          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 163          |\n",
      "|    time_elapsed         | 5793         |\n",
      "|    total_timesteps      | 333824       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0015836192 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.46        |\n",
      "|    explained_variance   | 0.261        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0852       |\n",
      "|    n_updates            | 1620         |\n",
      "|    policy_gradient_loss | -0.000478    |\n",
      "|    value_loss           | 0.46         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.64e+04     |\n",
      "|    ep_rew_mean          | 757          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 164          |\n",
      "|    time_elapsed         | 5828         |\n",
      "|    total_timesteps      | 335872       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0036134108 |\n",
      "|    clip_fraction        | 0.0118       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.5         |\n",
      "|    explained_variance   | 0.82         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 46.7         |\n",
      "|    n_updates            | 1630         |\n",
      "|    policy_gradient_loss | 0.00054      |\n",
      "|    value_loss           | 44.5         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.64e+04     |\n",
      "|    ep_rew_mean          | 757          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 165          |\n",
      "|    time_elapsed         | 5864         |\n",
      "|    total_timesteps      | 337920       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0032794666 |\n",
      "|    clip_fraction        | 0.00713      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.46        |\n",
      "|    explained_variance   | 0.957        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 6.67         |\n",
      "|    n_updates            | 1640         |\n",
      "|    policy_gradient_loss | -0.000578    |\n",
      "|    value_loss           | 80.5         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.64e+04     |\n",
      "|    ep_rew_mean          | 757          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 166          |\n",
      "|    time_elapsed         | 5900         |\n",
      "|    total_timesteps      | 339968       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0026769647 |\n",
      "|    clip_fraction        | 0.0128       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.51        |\n",
      "|    explained_variance   | 0.823        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 40.1         |\n",
      "|    n_updates            | 1650         |\n",
      "|    policy_gradient_loss | 0.000223     |\n",
      "|    value_loss           | 40.3         |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.64e+04    |\n",
      "|    ep_rew_mean          | 757         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 167         |\n",
      "|    time_elapsed         | 5936        |\n",
      "|    total_timesteps      | 342016      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009536902 |\n",
      "|    clip_fraction        | 0.00801     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.5        |\n",
      "|    explained_variance   | 0.125       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.165       |\n",
      "|    n_updates            | 1660        |\n",
      "|    policy_gradient_loss | -0.00407    |\n",
      "|    value_loss           | 0.359       |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.63e+04     |\n",
      "|    ep_rew_mean          | 754          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 168          |\n",
      "|    time_elapsed         | 5972         |\n",
      "|    total_timesteps      | 344064       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0020470638 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.45        |\n",
      "|    explained_variance   | 0.161        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0827       |\n",
      "|    n_updates            | 1670         |\n",
      "|    policy_gradient_loss | -0.00119     |\n",
      "|    value_loss           | 0.235        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.63e+04     |\n",
      "|    ep_rew_mean          | 754          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 169          |\n",
      "|    time_elapsed         | 6007         |\n",
      "|    total_timesteps      | 346112       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0061836853 |\n",
      "|    clip_fraction        | 0.0157       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.24        |\n",
      "|    explained_variance   | 0.981        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 25.4         |\n",
      "|    n_updates            | 1680         |\n",
      "|    policy_gradient_loss | -0.00313     |\n",
      "|    value_loss           | 45.6         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.63e+04     |\n",
      "|    ep_rew_mean          | 754          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 170          |\n",
      "|    time_elapsed         | 6043         |\n",
      "|    total_timesteps      | 348160       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0027332269 |\n",
      "|    clip_fraction        | 0.0108       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.33        |\n",
      "|    explained_variance   | 0.756        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 1.44         |\n",
      "|    n_updates            | 1690         |\n",
      "|    policy_gradient_loss | 0.000271     |\n",
      "|    value_loss           | 37.1         |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.63e+04    |\n",
      "|    ep_rew_mean          | 754         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 171         |\n",
      "|    time_elapsed         | 6078        |\n",
      "|    total_timesteps      | 350208      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007290528 |\n",
      "|    clip_fraction        | 0.00195     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.44       |\n",
      "|    explained_variance   | 0.265       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.0778      |\n",
      "|    n_updates            | 1700        |\n",
      "|    policy_gradient_loss | -0.00341    |\n",
      "|    value_loss           | 0.22        |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.6e+04      |\n",
      "|    ep_rew_mean          | 753          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 172          |\n",
      "|    time_elapsed         | 6113         |\n",
      "|    total_timesteps      | 352256       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0018634689 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.42        |\n",
      "|    explained_variance   | 0.172        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0966       |\n",
      "|    n_updates            | 1710         |\n",
      "|    policy_gradient_loss | -0.00121     |\n",
      "|    value_loss           | 0.208        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.6e+04      |\n",
      "|    ep_rew_mean          | 753          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 173          |\n",
      "|    time_elapsed         | 6149         |\n",
      "|    total_timesteps      | 354304       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0061832606 |\n",
      "|    clip_fraction        | 0.0312       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.25        |\n",
      "|    explained_variance   | 0.976        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 39.2         |\n",
      "|    n_updates            | 1720         |\n",
      "|    policy_gradient_loss | -6.16e-05    |\n",
      "|    value_loss           | 41           |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.6e+04      |\n",
      "|    ep_rew_mean          | 753          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 174          |\n",
      "|    time_elapsed         | 6184         |\n",
      "|    total_timesteps      | 356352       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0061740205 |\n",
      "|    clip_fraction        | 0.0125       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.28        |\n",
      "|    explained_variance   | 0.118        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.104        |\n",
      "|    n_updates            | 1730         |\n",
      "|    policy_gradient_loss | -0.00259     |\n",
      "|    value_loss           | 0.514        |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 1.6e+04    |\n",
      "|    ep_rew_mean          | 753        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 57         |\n",
      "|    iterations           | 175        |\n",
      "|    time_elapsed         | 6220       |\n",
      "|    total_timesteps      | 358400     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.00401518 |\n",
      "|    clip_fraction        | 0          |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.25      |\n",
      "|    explained_variance   | 0.0877     |\n",
      "|    learning_rate        | 1e-06      |\n",
      "|    loss                 | 0.124      |\n",
      "|    n_updates            | 1740       |\n",
      "|    policy_gradient_loss | -0.00123   |\n",
      "|    value_loss           | 0.4        |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.57e+04     |\n",
      "|    ep_rew_mean          | 752          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 176          |\n",
      "|    time_elapsed         | 6255         |\n",
      "|    total_timesteps      | 360448       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0035273363 |\n",
      "|    clip_fraction        | 0.00776      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.31        |\n",
      "|    explained_variance   | 0.865        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 23.5         |\n",
      "|    n_updates            | 1750         |\n",
      "|    policy_gradient_loss | -0.000296    |\n",
      "|    value_loss           | 33.4         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.57e+04     |\n",
      "|    ep_rew_mean          | 752          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 177          |\n",
      "|    time_elapsed         | 6289         |\n",
      "|    total_timesteps      | 362496       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0038371952 |\n",
      "|    clip_fraction        | 0.0141       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.22        |\n",
      "|    explained_variance   | 0.919        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 19.2         |\n",
      "|    n_updates            | 1760         |\n",
      "|    policy_gradient_loss | 0.000748     |\n",
      "|    value_loss           | 187          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.57e+04     |\n",
      "|    ep_rew_mean          | 752          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 178          |\n",
      "|    time_elapsed         | 6325         |\n",
      "|    total_timesteps      | 364544       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0032996733 |\n",
      "|    clip_fraction        | 0.00908      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.29        |\n",
      "|    explained_variance   | 0.765        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 32.6         |\n",
      "|    n_updates            | 1770         |\n",
      "|    policy_gradient_loss | -0.00045     |\n",
      "|    value_loss           | 38.8         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.57e+04     |\n",
      "|    ep_rew_mean          | 752          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 179          |\n",
      "|    time_elapsed         | 6361         |\n",
      "|    total_timesteps      | 366592       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0034294557 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.35        |\n",
      "|    explained_variance   | 0.205        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0602       |\n",
      "|    n_updates            | 1780         |\n",
      "|    policy_gradient_loss | -0.000446    |\n",
      "|    value_loss           | 0.186        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.57e+04     |\n",
      "|    ep_rew_mean          | 752          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 180          |\n",
      "|    time_elapsed         | 6397         |\n",
      "|    total_timesteps      | 368640       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012683645 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.35        |\n",
      "|    explained_variance   | 0.00409      |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0918       |\n",
      "|    n_updates            | 1790         |\n",
      "|    policy_gradient_loss | -0.000584    |\n",
      "|    value_loss           | 0.182        |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.57e+04    |\n",
      "|    ep_rew_mean          | 752         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 181         |\n",
      "|    time_elapsed         | 6433        |\n",
      "|    total_timesteps      | 370688      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007053798 |\n",
      "|    clip_fraction        | 0.0224      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.19       |\n",
      "|    explained_variance   | 0.947       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 7.28        |\n",
      "|    n_updates            | 1800        |\n",
      "|    policy_gradient_loss | 0.00198     |\n",
      "|    value_loss           | 63.3        |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.57e+04     |\n",
      "|    ep_rew_mean          | 752          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 182          |\n",
      "|    time_elapsed         | 6467         |\n",
      "|    total_timesteps      | 372736       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0026804032 |\n",
      "|    clip_fraction        | 0.00762      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.23        |\n",
      "|    explained_variance   | 0.845        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 7.15         |\n",
      "|    n_updates            | 1810         |\n",
      "|    policy_gradient_loss | 0.00141      |\n",
      "|    value_loss           | 31.7         |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.57e+04    |\n",
      "|    ep_rew_mean          | 752         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 183         |\n",
      "|    time_elapsed         | 6503        |\n",
      "|    total_timesteps      | 374784      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004693878 |\n",
      "|    clip_fraction        | 0.0525      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.36       |\n",
      "|    explained_variance   | 0.178       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.389       |\n",
      "|    n_updates            | 1820        |\n",
      "|    policy_gradient_loss | -0.00501    |\n",
      "|    value_loss           | 0.377       |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.57e+04     |\n",
      "|    ep_rew_mean          | 752          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 184          |\n",
      "|    time_elapsed         | 6539         |\n",
      "|    total_timesteps      | 376832       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0030330464 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.33        |\n",
      "|    explained_variance   | 0.156        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0835       |\n",
      "|    n_updates            | 1830         |\n",
      "|    policy_gradient_loss | -0.00147     |\n",
      "|    value_loss           | 0.267        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.57e+04     |\n",
      "|    ep_rew_mean          | 752          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 185          |\n",
      "|    time_elapsed         | 6575         |\n",
      "|    total_timesteps      | 378880       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0066042906 |\n",
      "|    clip_fraction        | 0.0144       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.15        |\n",
      "|    explained_variance   | 0.932        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 76.8         |\n",
      "|    n_updates            | 1840         |\n",
      "|    policy_gradient_loss | 0.00115      |\n",
      "|    value_loss           | 107          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.57e+04     |\n",
      "|    ep_rew_mean          | 752          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 186          |\n",
      "|    time_elapsed         | 6612         |\n",
      "|    total_timesteps      | 380928       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012302266 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.18        |\n",
      "|    explained_variance   | 0.916        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.138        |\n",
      "|    n_updates            | 1850         |\n",
      "|    policy_gradient_loss | -0.00106     |\n",
      "|    value_loss           | 0.641        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.57e+04     |\n",
      "|    ep_rew_mean          | 752          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 187          |\n",
      "|    time_elapsed         | 6647         |\n",
      "|    total_timesteps      | 382976       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010993232 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.18        |\n",
      "|    explained_variance   | 0.0275       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.115        |\n",
      "|    n_updates            | 1860         |\n",
      "|    policy_gradient_loss | -0.000638    |\n",
      "|    value_loss           | 0.214        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.6e+04       |\n",
      "|    ep_rew_mean          | 753           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 188           |\n",
      "|    time_elapsed         | 6683          |\n",
      "|    total_timesteps      | 385024        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00078054506 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.2          |\n",
      "|    explained_variance   | 0.0875        |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.0824        |\n",
      "|    n_updates            | 1870          |\n",
      "|    policy_gradient_loss | -0.000724     |\n",
      "|    value_loss           | 0.221         |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.6e+04       |\n",
      "|    ep_rew_mean          | 753           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 189           |\n",
      "|    time_elapsed         | 6718          |\n",
      "|    total_timesteps      | 387072        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00045679364 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.11         |\n",
      "|    explained_variance   | 0.978         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 23.9          |\n",
      "|    n_updates            | 1880          |\n",
      "|    policy_gradient_loss | -0.000728     |\n",
      "|    value_loss           | 34.8          |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.55e+04      |\n",
      "|    ep_rew_mean          | 792           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 190           |\n",
      "|    time_elapsed         | 6754          |\n",
      "|    total_timesteps      | 389120        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00019979905 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.11         |\n",
      "|    explained_variance   | 0.865         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 126           |\n",
      "|    n_updates            | 1890          |\n",
      "|    policy_gradient_loss | -0.00101      |\n",
      "|    value_loss           | 168           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.55e+04      |\n",
      "|    ep_rew_mean          | 792           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 191           |\n",
      "|    time_elapsed         | 6790          |\n",
      "|    total_timesteps      | 391168        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00016746618 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.997        |\n",
      "|    explained_variance   | 0.931         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 98.6          |\n",
      "|    n_updates            | 1900          |\n",
      "|    policy_gradient_loss | -0.00024      |\n",
      "|    value_loss           | 262           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.55e+04     |\n",
      "|    ep_rew_mean          | 792          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 192          |\n",
      "|    time_elapsed         | 6826         |\n",
      "|    total_timesteps      | 393216       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0052592847 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.2         |\n",
      "|    explained_variance   | -0.88        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.134        |\n",
      "|    n_updates            | 1910         |\n",
      "|    policy_gradient_loss | -0.000851    |\n",
      "|    value_loss           | 0.5          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.55e+04    |\n",
      "|    ep_rew_mean          | 792         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 193         |\n",
      "|    time_elapsed         | 6862        |\n",
      "|    total_timesteps      | 395264      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.003181254 |\n",
      "|    clip_fraction        | 0.0195      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.31       |\n",
      "|    explained_variance   | -0.836      |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.148       |\n",
      "|    n_updates            | 1920        |\n",
      "|    policy_gradient_loss | -0.00182    |\n",
      "|    value_loss           | 0.444       |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.55e+04    |\n",
      "|    ep_rew_mean          | 792         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 194         |\n",
      "|    time_elapsed         | 6897        |\n",
      "|    total_timesteps      | 397312      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006107577 |\n",
      "|    clip_fraction        | 0.000488    |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.34       |\n",
      "|    explained_variance   | -0.64       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.127       |\n",
      "|    n_updates            | 1930        |\n",
      "|    policy_gradient_loss | -0.00258    |\n",
      "|    value_loss           | 0.348       |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.55e+04      |\n",
      "|    ep_rew_mean          | 792           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 195           |\n",
      "|    time_elapsed         | 6933          |\n",
      "|    total_timesteps      | 399360        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00035908355 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.25         |\n",
      "|    explained_variance   | 0.942         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 55.6          |\n",
      "|    n_updates            | 1940          |\n",
      "|    policy_gradient_loss | 8.11e-05      |\n",
      "|    value_loss           | 197           |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.55e+04    |\n",
      "|    ep_rew_mean          | 792         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 196         |\n",
      "|    time_elapsed         | 6969        |\n",
      "|    total_timesteps      | 401408      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011193061 |\n",
      "|    clip_fraction        | 0.0937      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.46       |\n",
      "|    explained_variance   | -0.442      |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.279       |\n",
      "|    n_updates            | 1950        |\n",
      "|    policy_gradient_loss | -0.0089     |\n",
      "|    value_loss           | 0.525       |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.55e+04     |\n",
      "|    ep_rew_mean          | 792          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 197          |\n",
      "|    time_elapsed         | 7003         |\n",
      "|    total_timesteps      | 403456       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0021813628 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.48        |\n",
      "|    explained_variance   | -0.566       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.204        |\n",
      "|    n_updates            | 1960         |\n",
      "|    policy_gradient_loss | -0.00219     |\n",
      "|    value_loss           | 0.427        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.55e+04     |\n",
      "|    ep_rew_mean          | 805          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 198          |\n",
      "|    time_elapsed         | 7038         |\n",
      "|    total_timesteps      | 405504       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0019157476 |\n",
      "|    clip_fraction        | 0.000928     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.5         |\n",
      "|    explained_variance   | -0.585       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.159        |\n",
      "|    n_updates            | 1970         |\n",
      "|    policy_gradient_loss | -0.000913    |\n",
      "|    value_loss           | 0.293        |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.55e+04    |\n",
      "|    ep_rew_mean          | 805         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 199         |\n",
      "|    time_elapsed         | 7074        |\n",
      "|    total_timesteps      | 407552      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.003934948 |\n",
      "|    clip_fraction        | 0.0147      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.39       |\n",
      "|    explained_variance   | 0.971       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 31.7        |\n",
      "|    n_updates            | 1980        |\n",
      "|    policy_gradient_loss | 0.00173     |\n",
      "|    value_loss           | 24          |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.55e+04      |\n",
      "|    ep_rew_mean          | 805           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 200           |\n",
      "|    time_elapsed         | 7110          |\n",
      "|    total_timesteps      | 409600        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00043506135 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.36         |\n",
      "|    explained_variance   | 0.966         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 34            |\n",
      "|    n_updates            | 1990          |\n",
      "|    policy_gradient_loss | -0.000451     |\n",
      "|    value_loss           | 59.6          |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.55e+04     |\n",
      "|    ep_rew_mean          | 805          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 201          |\n",
      "|    time_elapsed         | 7146         |\n",
      "|    total_timesteps      | 411648       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0022939653 |\n",
      "|    clip_fraction        | 0.00576      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.45        |\n",
      "|    explained_variance   | 0.716        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 66.4         |\n",
      "|    n_updates            | 2000         |\n",
      "|    policy_gradient_loss | 0.000783     |\n",
      "|    value_loss           | 48.1         |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.55e+04    |\n",
      "|    ep_rew_mean          | 805         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 202         |\n",
      "|    time_elapsed         | 7182        |\n",
      "|    total_timesteps      | 413696      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011472557 |\n",
      "|    clip_fraction        | 0.0409      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.43       |\n",
      "|    explained_variance   | -0.289      |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.0935      |\n",
      "|    n_updates            | 2010        |\n",
      "|    policy_gradient_loss | -0.00516    |\n",
      "|    value_loss           | 0.26        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.55e+04    |\n",
      "|    ep_rew_mean          | 805         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 203         |\n",
      "|    time_elapsed         | 7217        |\n",
      "|    total_timesteps      | 415744      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.010067288 |\n",
      "|    clip_fraction        | 0.0156      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.34       |\n",
      "|    explained_variance   | -0.204      |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.105       |\n",
      "|    n_updates            | 2020        |\n",
      "|    policy_gradient_loss | -0.00502    |\n",
      "|    value_loss           | 0.319       |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.55e+04      |\n",
      "|    ep_rew_mean          | 822           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 204           |\n",
      "|    time_elapsed         | 7252          |\n",
      "|    total_timesteps      | 417792        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00087677327 |\n",
      "|    clip_fraction        | 0.0019        |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.2          |\n",
      "|    explained_variance   | 0.958         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 15.5          |\n",
      "|    n_updates            | 2030          |\n",
      "|    policy_gradient_loss | 0.000906      |\n",
      "|    value_loss           | 79.7          |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.55e+04     |\n",
      "|    ep_rew_mean          | 822          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 205          |\n",
      "|    time_elapsed         | 7288         |\n",
      "|    total_timesteps      | 419840       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0009466311 |\n",
      "|    clip_fraction        | 0.0019       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.15        |\n",
      "|    explained_variance   | 0.96         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 45.5         |\n",
      "|    n_updates            | 2040         |\n",
      "|    policy_gradient_loss | 0.000722     |\n",
      "|    value_loss           | 74.3         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.55e+04     |\n",
      "|    ep_rew_mean          | 822          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 206          |\n",
      "|    time_elapsed         | 7325         |\n",
      "|    total_timesteps      | 421888       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005559979 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.16        |\n",
      "|    explained_variance   | 0.954        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 82.6         |\n",
      "|    n_updates            | 2050         |\n",
      "|    policy_gradient_loss | -0.00133     |\n",
      "|    value_loss           | 115          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.55e+04     |\n",
      "|    ep_rew_mean          | 822          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 207          |\n",
      "|    time_elapsed         | 7359         |\n",
      "|    total_timesteps      | 423936       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0004924562 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.21        |\n",
      "|    explained_variance   | -0.538       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.145        |\n",
      "|    n_updates            | 2060         |\n",
      "|    policy_gradient_loss | 0.000183     |\n",
      "|    value_loss           | 0.331        |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.55e+04    |\n",
      "|    ep_rew_mean          | 822         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 208         |\n",
      "|    time_elapsed         | 7394        |\n",
      "|    total_timesteps      | 425984      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.014032599 |\n",
      "|    clip_fraction        | 0.0344      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.15       |\n",
      "|    explained_variance   | -0.419      |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.193       |\n",
      "|    n_updates            | 2070        |\n",
      "|    policy_gradient_loss | -0.00544    |\n",
      "|    value_loss           | 0.296       |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.52e+04     |\n",
      "|    ep_rew_mean          | 832          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 209          |\n",
      "|    time_elapsed         | 7429         |\n",
      "|    total_timesteps      | 428032       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0038653067 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.04        |\n",
      "|    explained_variance   | -0.652       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0976       |\n",
      "|    n_updates            | 2080         |\n",
      "|    policy_gradient_loss | -0.0014      |\n",
      "|    value_loss           | 0.351        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.52e+04      |\n",
      "|    ep_rew_mean          | 832           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 210           |\n",
      "|    time_elapsed         | 7465          |\n",
      "|    total_timesteps      | 430080        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00016877614 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.823        |\n",
      "|    explained_variance   | 0.927         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 27.3          |\n",
      "|    n_updates            | 2090          |\n",
      "|    policy_gradient_loss | -0.000252     |\n",
      "|    value_loss           | 356           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.52e+04     |\n",
      "|    ep_rew_mean          | 832          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 211          |\n",
      "|    time_elapsed         | 7501         |\n",
      "|    total_timesteps      | 432128       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0029427595 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.98        |\n",
      "|    explained_variance   | -0.0213      |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.18         |\n",
      "|    n_updates            | 2100         |\n",
      "|    policy_gradient_loss | -0.00105     |\n",
      "|    value_loss           | 0.322        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.52e+04     |\n",
      "|    ep_rew_mean          | 832          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 212          |\n",
      "|    time_elapsed         | 7536         |\n",
      "|    total_timesteps      | 434176       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0035402311 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.917       |\n",
      "|    explained_variance   | -0.135       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.162        |\n",
      "|    n_updates            | 2110         |\n",
      "|    policy_gradient_loss | -0.00171     |\n",
      "|    value_loss           | 0.332        |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.5e+04     |\n",
      "|    ep_rew_mean          | 867         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 213         |\n",
      "|    time_elapsed         | 7571        |\n",
      "|    total_timesteps      | 436224      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.002290401 |\n",
      "|    clip_fraction        | 0.00923     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.921      |\n",
      "|    explained_variance   | -0.117      |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.128       |\n",
      "|    n_updates            | 2120        |\n",
      "|    policy_gradient_loss | -0.00127    |\n",
      "|    value_loss           | 0.288       |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.46e+04      |\n",
      "|    ep_rew_mean          | 914           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 214           |\n",
      "|    time_elapsed         | 7607          |\n",
      "|    total_timesteps      | 438272        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00014794117 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.749        |\n",
      "|    explained_variance   | 0.91          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 55.8          |\n",
      "|    n_updates            | 2130          |\n",
      "|    policy_gradient_loss | 0.000149      |\n",
      "|    value_loss           | 387           |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.42e+04    |\n",
      "|    ep_rew_mean          | 935         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 215         |\n",
      "|    time_elapsed         | 7644        |\n",
      "|    total_timesteps      | 440320      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001195909 |\n",
      "|    clip_fraction        | 0.00415     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.647      |\n",
      "|    explained_variance   | 0.837       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 164         |\n",
      "|    n_updates            | 2140        |\n",
      "|    policy_gradient_loss | 0.000438    |\n",
      "|    value_loss           | 678         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.34e+04     |\n",
      "|    ep_rew_mean          | 991          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 216          |\n",
      "|    time_elapsed         | 7677         |\n",
      "|    total_timesteps      | 442368       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006744405 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.833       |\n",
      "|    explained_variance   | 0.933        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 71.8         |\n",
      "|    n_updates            | 2150         |\n",
      "|    policy_gradient_loss | -0.0008      |\n",
      "|    value_loss           | 291          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.3e+04      |\n",
      "|    ep_rew_mean          | 1.01e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 217          |\n",
      "|    time_elapsed         | 7713         |\n",
      "|    total_timesteps      | 444416       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006301093 |\n",
      "|    clip_fraction        | 0.0021       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.57        |\n",
      "|    explained_variance   | 0.592        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 236          |\n",
      "|    n_updates            | 2160         |\n",
      "|    policy_gradient_loss | -0.000224    |\n",
      "|    value_loss           | 773          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.3e+04      |\n",
      "|    ep_rew_mean          | 1.01e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 218          |\n",
      "|    time_elapsed         | 7748         |\n",
      "|    total_timesteps      | 446464       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006056584 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.805       |\n",
      "|    explained_variance   | 0.882        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 227          |\n",
      "|    n_updates            | 2170         |\n",
      "|    policy_gradient_loss | -0.000932    |\n",
      "|    value_loss           | 366          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.3e+04      |\n",
      "|    ep_rew_mean          | 1.01e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 219          |\n",
      "|    time_elapsed         | 7784         |\n",
      "|    total_timesteps      | 448512       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010836397 |\n",
      "|    clip_fraction        | 0.000879     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.867       |\n",
      "|    explained_variance   | 0.95         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 33.3         |\n",
      "|    n_updates            | 2180         |\n",
      "|    policy_gradient_loss | -0.000671    |\n",
      "|    value_loss           | 123          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.3e+04      |\n",
      "|    ep_rew_mean          | 1.01e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 220          |\n",
      "|    time_elapsed         | 7820         |\n",
      "|    total_timesteps      | 450560       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0047058277 |\n",
      "|    clip_fraction        | 0.0448       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.09        |\n",
      "|    explained_variance   | 0.862        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 13.6         |\n",
      "|    n_updates            | 2190         |\n",
      "|    policy_gradient_loss | -0.00045     |\n",
      "|    value_loss           | 19.4         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.29e+04     |\n",
      "|    ep_rew_mean          | 1.02e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 221          |\n",
      "|    time_elapsed         | 7855         |\n",
      "|    total_timesteps      | 452608       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0092888065 |\n",
      "|    clip_fraction        | 0.0323       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.06        |\n",
      "|    explained_variance   | -0.116       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.151        |\n",
      "|    n_updates            | 2200         |\n",
      "|    policy_gradient_loss | -0.00453     |\n",
      "|    value_loss           | 0.896        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.26e+04      |\n",
      "|    ep_rew_mean          | 1.04e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 222           |\n",
      "|    time_elapsed         | 7890          |\n",
      "|    total_timesteps      | 454656        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00016442395 |\n",
      "|    clip_fraction        | 4.88e-05      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.769        |\n",
      "|    explained_variance   | 0.917         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 60.9          |\n",
      "|    n_updates            | 2210          |\n",
      "|    policy_gradient_loss | -1.07e-05     |\n",
      "|    value_loss           | 301           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.23e+04      |\n",
      "|    ep_rew_mean          | 1.07e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 223           |\n",
      "|    time_elapsed         | 7926          |\n",
      "|    total_timesteps      | 456704        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00031413315 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.664        |\n",
      "|    explained_variance   | 0.925         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 69.6          |\n",
      "|    n_updates            | 2220          |\n",
      "|    policy_gradient_loss | -0.000389     |\n",
      "|    value_loss           | 299           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.2e+04       |\n",
      "|    ep_rew_mean          | 1.09e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 224           |\n",
      "|    time_elapsed         | 7961          |\n",
      "|    total_timesteps      | 458752        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00020449766 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.507        |\n",
      "|    explained_variance   | 0.906         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 175           |\n",
      "|    n_updates            | 2230          |\n",
      "|    policy_gradient_loss | -0.000446     |\n",
      "|    value_loss           | 341           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.15e+04     |\n",
      "|    ep_rew_mean          | 1.13e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 225          |\n",
      "|    time_elapsed         | 7998         |\n",
      "|    total_timesteps      | 460800       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0003638144 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.501       |\n",
      "|    explained_variance   | 0.852        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 256          |\n",
      "|    n_updates            | 2240         |\n",
      "|    policy_gradient_loss | 4.6e-05      |\n",
      "|    value_loss           | 522          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+04      |\n",
      "|    ep_rew_mean          | 1.17e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 226          |\n",
      "|    time_elapsed         | 8031         |\n",
      "|    total_timesteps      | 462848       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010357298 |\n",
      "|    clip_fraction        | 0.00259      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.55        |\n",
      "|    explained_variance   | 0.949        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 48           |\n",
      "|    n_updates            | 2250         |\n",
      "|    policy_gradient_loss | -0.000189    |\n",
      "|    value_loss           | 268          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.05e+04      |\n",
      "|    ep_rew_mean          | 1.16e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 227           |\n",
      "|    time_elapsed         | 8067          |\n",
      "|    total_timesteps      | 464896        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00029980027 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.421        |\n",
      "|    explained_variance   | 0.894         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 113           |\n",
      "|    n_updates            | 2260          |\n",
      "|    policy_gradient_loss | -0.000141     |\n",
      "|    value_loss           | 471           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 9.93e+03     |\n",
      "|    ep_rew_mean          | 1.19e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 228          |\n",
      "|    time_elapsed         | 8102         |\n",
      "|    total_timesteps      | 466944       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 8.220982e-05 |\n",
      "|    clip_fraction        | 4.88e-05     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.351       |\n",
      "|    explained_variance   | 0.847        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 75.1         |\n",
      "|    n_updates            | 2270         |\n",
      "|    policy_gradient_loss | -0.000144    |\n",
      "|    value_loss           | 542          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 9.75e+03      |\n",
      "|    ep_rew_mean          | 1.2e+03       |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 229           |\n",
      "|    time_elapsed         | 8138          |\n",
      "|    total_timesteps      | 468992        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00031620977 |\n",
      "|    clip_fraction        | 0.00181       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.283        |\n",
      "|    explained_variance   | 0.616         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 354           |\n",
      "|    n_updates            | 2280          |\n",
      "|    policy_gradient_loss | 0.000602      |\n",
      "|    value_loss           | 573           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 9.41e+03      |\n",
      "|    ep_rew_mean          | 1.23e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 230           |\n",
      "|    time_elapsed         | 8173          |\n",
      "|    total_timesteps      | 471040        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00083048904 |\n",
      "|    clip_fraction        | 0.00142       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.431        |\n",
      "|    explained_variance   | 0.895         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 349           |\n",
      "|    n_updates            | 2290          |\n",
      "|    policy_gradient_loss | -0.000764     |\n",
      "|    value_loss           | 334           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 9.41e+03      |\n",
      "|    ep_rew_mean          | 1.23e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 231           |\n",
      "|    time_elapsed         | 8209          |\n",
      "|    total_timesteps      | 473088        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 9.6195814e-05 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.394        |\n",
      "|    explained_variance   | 0.849         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 130           |\n",
      "|    n_updates            | 2300          |\n",
      "|    policy_gradient_loss | -0.000331     |\n",
      "|    value_loss           | 471           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 9.12e+03      |\n",
      "|    ep_rew_mean          | 1.27e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 232           |\n",
      "|    time_elapsed         | 8244          |\n",
      "|    total_timesteps      | 475136        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00038531033 |\n",
      "|    clip_fraction        | 0.000146      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.472        |\n",
      "|    explained_variance   | 0.938         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 125           |\n",
      "|    n_updates            | 2310          |\n",
      "|    policy_gradient_loss | -0.000323     |\n",
      "|    value_loss           | 208           |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 8.83e+03    |\n",
      "|    ep_rew_mean          | 1.3e+03     |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 233         |\n",
      "|    time_elapsed         | 8280        |\n",
      "|    total_timesteps      | 477184      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.002295668 |\n",
      "|    clip_fraction        | 0.00908     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.371      |\n",
      "|    explained_variance   | 0.917       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 88.6        |\n",
      "|    n_updates            | 2320        |\n",
      "|    policy_gradient_loss | -0.00108    |\n",
      "|    value_loss           | 338         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 8.68e+03     |\n",
      "|    ep_rew_mean          | 1.3e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 234          |\n",
      "|    time_elapsed         | 8315         |\n",
      "|    total_timesteps      | 479232       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008567581 |\n",
      "|    clip_fraction        | 0.00308      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.289       |\n",
      "|    explained_variance   | 0.663        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 196          |\n",
      "|    n_updates            | 2330         |\n",
      "|    policy_gradient_loss | -0.0004      |\n",
      "|    value_loss           | 530          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 8.43e+03     |\n",
      "|    ep_rew_mean          | 1.34e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 235          |\n",
      "|    time_elapsed         | 8352         |\n",
      "|    total_timesteps      | 481280       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005887301 |\n",
      "|    clip_fraction        | 0.000586     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.313       |\n",
      "|    explained_variance   | 0.692        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 320          |\n",
      "|    n_updates            | 2340         |\n",
      "|    policy_gradient_loss | 0.000226     |\n",
      "|    value_loss           | 465          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 8.04e+03      |\n",
      "|    ep_rew_mean          | 1.32e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 236           |\n",
      "|    time_elapsed         | 8387          |\n",
      "|    total_timesteps      | 483328        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00048865285 |\n",
      "|    clip_fraction        | 0.000195      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.378        |\n",
      "|    explained_variance   | 0.899         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 395           |\n",
      "|    n_updates            | 2350          |\n",
      "|    policy_gradient_loss | 0.000291      |\n",
      "|    value_loss           | 356           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 7.82e+03     |\n",
      "|    ep_rew_mean          | 1.33e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 237          |\n",
      "|    time_elapsed         | 8422         |\n",
      "|    total_timesteps      | 485376       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011922178 |\n",
      "|    clip_fraction        | 0.0108       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.344       |\n",
      "|    explained_variance   | 0.841        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 219          |\n",
      "|    n_updates            | 2360         |\n",
      "|    policy_gradient_loss | -0.00163     |\n",
      "|    value_loss           | 701          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 7.6e+03       |\n",
      "|    ep_rew_mean          | 1.33e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 238           |\n",
      "|    time_elapsed         | 8458          |\n",
      "|    total_timesteps      | 487424        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00045483783 |\n",
      "|    clip_fraction        | 0.00107       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.334        |\n",
      "|    explained_variance   | 0.865         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 160           |\n",
      "|    n_updates            | 2370          |\n",
      "|    policy_gradient_loss | -0.000752     |\n",
      "|    value_loss           | 443           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 7.4e+03      |\n",
      "|    ep_rew_mean          | 1.35e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 239          |\n",
      "|    time_elapsed         | 8493         |\n",
      "|    total_timesteps      | 489472       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012079342 |\n",
      "|    clip_fraction        | 0.00708      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.282       |\n",
      "|    explained_variance   | 0.721        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 442          |\n",
      "|    n_updates            | 2380         |\n",
      "|    policy_gradient_loss | -0.000392    |\n",
      "|    value_loss           | 579          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 7.12e+03      |\n",
      "|    ep_rew_mean          | 1.34e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 240           |\n",
      "|    time_elapsed         | 8529          |\n",
      "|    total_timesteps      | 491520        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00053308846 |\n",
      "|    clip_fraction        | 0.000732      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.329        |\n",
      "|    explained_variance   | 0.807         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 544           |\n",
      "|    n_updates            | 2390          |\n",
      "|    policy_gradient_loss | -0.000576     |\n",
      "|    value_loss           | 451           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 7.04e+03      |\n",
      "|    ep_rew_mean          | 1.36e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 241           |\n",
      "|    time_elapsed         | 8564          |\n",
      "|    total_timesteps      | 493568        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00019475786 |\n",
      "|    clip_fraction        | 0.000195      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.348        |\n",
      "|    explained_variance   | 0.742         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 254           |\n",
      "|    n_updates            | 2400          |\n",
      "|    policy_gradient_loss | -6.61e-05     |\n",
      "|    value_loss           | 671           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 6.79e+03     |\n",
      "|    ep_rew_mean          | 1.36e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 242          |\n",
      "|    time_elapsed         | 8600         |\n",
      "|    total_timesteps      | 495616       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005867147 |\n",
      "|    clip_fraction        | 0.00107      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.44        |\n",
      "|    explained_variance   | 0.808        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 466          |\n",
      "|    n_updates            | 2410         |\n",
      "|    policy_gradient_loss | -0.00118     |\n",
      "|    value_loss           | 505          |\n",
      "------------------------------------------\n",
      "--------------------------------------------\n",
      "| rollout/                |                |\n",
      "|    ep_len_mean          | 6.72e+03       |\n",
      "|    ep_rew_mean          | 1.38e+03       |\n",
      "| time/                   |                |\n",
      "|    fps                  | 57             |\n",
      "|    iterations           | 243            |\n",
      "|    time_elapsed         | 8635           |\n",
      "|    total_timesteps      | 497664         |\n",
      "| train/                  |                |\n",
      "|    approx_kl            | 0.000105721905 |\n",
      "|    clip_fraction        | 0              |\n",
      "|    clip_range           | 0.2            |\n",
      "|    entropy_loss         | -0.399         |\n",
      "|    explained_variance   | 0.821          |\n",
      "|    learning_rate        | 1e-06          |\n",
      "|    loss                 | 168            |\n",
      "|    n_updates            | 2420           |\n",
      "|    policy_gradient_loss | -4.21e-05      |\n",
      "|    value_loss           | 521            |\n",
      "--------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 6.65e+03     |\n",
      "|    ep_rew_mean          | 1.39e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 244          |\n",
      "|    time_elapsed         | 8671         |\n",
      "|    total_timesteps      | 499712       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0017295578 |\n",
      "|    clip_fraction        | 0.0229       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.491       |\n",
      "|    explained_variance   | 0.83         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 215          |\n",
      "|    n_updates            | 2430         |\n",
      "|    policy_gradient_loss | -0.00182     |\n",
      "|    value_loss           | 487          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 6.51e+03      |\n",
      "|    ep_rew_mean          | 1.39e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 245           |\n",
      "|    time_elapsed         | 8707          |\n",
      "|    total_timesteps      | 501760        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00028302602 |\n",
      "|    clip_fraction        | 0.000781      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.488        |\n",
      "|    explained_variance   | 0.934         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 169           |\n",
      "|    n_updates            | 2440          |\n",
      "|    policy_gradient_loss | -0.000124     |\n",
      "|    value_loss           | 227           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 6.37e+03     |\n",
      "|    ep_rew_mean          | 1.4e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 246          |\n",
      "|    time_elapsed         | 8741         |\n",
      "|    total_timesteps      | 503808       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007943987 |\n",
      "|    clip_fraction        | 0.000879     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.5         |\n",
      "|    explained_variance   | 0.908        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 488          |\n",
      "|    n_updates            | 2450         |\n",
      "|    policy_gradient_loss | -0.000233    |\n",
      "|    value_loss           | 468          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 6.3e+03      |\n",
      "|    ep_rew_mean          | 1.4e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 247          |\n",
      "|    time_elapsed         | 8776         |\n",
      "|    total_timesteps      | 505856       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011353295 |\n",
      "|    clip_fraction        | 0.0145       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.396       |\n",
      "|    explained_variance   | 0.812        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 183          |\n",
      "|    n_updates            | 2460         |\n",
      "|    policy_gradient_loss | -0.00146     |\n",
      "|    value_loss           | 577          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 6.25e+03     |\n",
      "|    ep_rew_mean          | 1.4e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 248          |\n",
      "|    time_elapsed         | 8812         |\n",
      "|    total_timesteps      | 507904       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0016030874 |\n",
      "|    clip_fraction        | 0.00674      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.446       |\n",
      "|    explained_variance   | 0.857        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 256          |\n",
      "|    n_updates            | 2470         |\n",
      "|    policy_gradient_loss | -0.00103     |\n",
      "|    value_loss           | 432          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 6.07e+03     |\n",
      "|    ep_rew_mean          | 1.43e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 249          |\n",
      "|    time_elapsed         | 8847         |\n",
      "|    total_timesteps      | 509952       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0022187093 |\n",
      "|    clip_fraction        | 0.0102       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.394       |\n",
      "|    explained_variance   | 0.837        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 181          |\n",
      "|    n_updates            | 2480         |\n",
      "|    policy_gradient_loss | -0.00102     |\n",
      "|    value_loss           | 495          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 6.01e+03     |\n",
      "|    ep_rew_mean          | 1.43e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 250          |\n",
      "|    time_elapsed         | 8882         |\n",
      "|    total_timesteps      | 512000       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 7.346721e-05 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.262       |\n",
      "|    explained_variance   | 0.651        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 143          |\n",
      "|    n_updates            | 2490         |\n",
      "|    policy_gradient_loss | 0.000274     |\n",
      "|    value_loss           | 690          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 5.83e+03     |\n",
      "|    ep_rew_mean          | 1.43e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 251          |\n",
      "|    time_elapsed         | 8917         |\n",
      "|    total_timesteps      | 514048       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0016216717 |\n",
      "|    clip_fraction        | 0.00957      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.301       |\n",
      "|    explained_variance   | 0.656        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 92.7         |\n",
      "|    n_updates            | 2500         |\n",
      "|    policy_gradient_loss | -0.000881    |\n",
      "|    value_loss           | 459          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 5.79e+03     |\n",
      "|    ep_rew_mean          | 1.44e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 252          |\n",
      "|    time_elapsed         | 8953         |\n",
      "|    total_timesteps      | 516096       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0015140385 |\n",
      "|    clip_fraction        | 0.0158       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.389       |\n",
      "|    explained_variance   | 0.867        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 106          |\n",
      "|    n_updates            | 2510         |\n",
      "|    policy_gradient_loss | -0.00104     |\n",
      "|    value_loss           | 467          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 5.76e+03     |\n",
      "|    ep_rew_mean          | 1.44e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 253          |\n",
      "|    time_elapsed         | 8988         |\n",
      "|    total_timesteps      | 518144       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0018751731 |\n",
      "|    clip_fraction        | 0.00356      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.538       |\n",
      "|    explained_variance   | 0.937        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 84.3         |\n",
      "|    n_updates            | 2520         |\n",
      "|    policy_gradient_loss | -0.000448    |\n",
      "|    value_loss           | 262          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 5.71e+03     |\n",
      "|    ep_rew_mean          | 1.45e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 254          |\n",
      "|    time_elapsed         | 9025         |\n",
      "|    total_timesteps      | 520192       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0040076114 |\n",
      "|    clip_fraction        | 0.0202       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.607       |\n",
      "|    explained_variance   | 0.872        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 242          |\n",
      "|    n_updates            | 2530         |\n",
      "|    policy_gradient_loss | -0.000814    |\n",
      "|    value_loss           | 411          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 5.61e+03    |\n",
      "|    ep_rew_mean          | 1.46e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 255         |\n",
      "|    time_elapsed         | 9059        |\n",
      "|    total_timesteps      | 522240      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.000974944 |\n",
      "|    clip_fraction        | 0.00327     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.308      |\n",
      "|    explained_variance   | 0.838       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 236         |\n",
      "|    n_updates            | 2540        |\n",
      "|    policy_gradient_loss | -0.00123    |\n",
      "|    value_loss           | 432         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 5.46e+03     |\n",
      "|    ep_rew_mean          | 1.45e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 256          |\n",
      "|    time_elapsed         | 9095         |\n",
      "|    total_timesteps      | 524288       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006867836 |\n",
      "|    clip_fraction        | 0.00215      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.289       |\n",
      "|    explained_variance   | 0.806        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 669          |\n",
      "|    n_updates            | 2550         |\n",
      "|    policy_gradient_loss | -0.000574    |\n",
      "|    value_loss           | 547          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 5.32e+03      |\n",
      "|    ep_rew_mean          | 1.45e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 257           |\n",
      "|    time_elapsed         | 9130          |\n",
      "|    total_timesteps      | 526336        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00017173804 |\n",
      "|    clip_fraction        | 0.000146      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.234        |\n",
      "|    explained_variance   | 0.807         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 381           |\n",
      "|    n_updates            | 2560          |\n",
      "|    policy_gradient_loss | -0.000164     |\n",
      "|    value_loss           | 463           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 5.28e+03     |\n",
      "|    ep_rew_mean          | 1.45e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 258          |\n",
      "|    time_elapsed         | 9166         |\n",
      "|    total_timesteps      | 528384       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005558757 |\n",
      "|    clip_fraction        | 0.00645      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.232       |\n",
      "|    explained_variance   | 0.71         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 380          |\n",
      "|    n_updates            | 2570         |\n",
      "|    policy_gradient_loss | -0.000659    |\n",
      "|    value_loss           | 635          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 4.98e+03     |\n",
      "|    ep_rew_mean          | 1.48e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 259          |\n",
      "|    time_elapsed         | 9201         |\n",
      "|    total_timesteps      | 530432       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0015373096 |\n",
      "|    clip_fraction        | 0.00957      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.342       |\n",
      "|    explained_variance   | 0.937        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 59.6         |\n",
      "|    n_updates            | 2580         |\n",
      "|    policy_gradient_loss | -0.0011      |\n",
      "|    value_loss           | 234          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 4.77e+03     |\n",
      "|    ep_rew_mean          | 1.49e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 260          |\n",
      "|    time_elapsed         | 9235         |\n",
      "|    total_timesteps      | 532480       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0004018489 |\n",
      "|    clip_fraction        | 0.00176      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.236       |\n",
      "|    explained_variance   | 0.803        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 328          |\n",
      "|    n_updates            | 2590         |\n",
      "|    policy_gradient_loss | -0.000668    |\n",
      "|    value_loss           | 473          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 4.77e+03      |\n",
      "|    ep_rew_mean          | 1.49e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 261           |\n",
      "|    time_elapsed         | 9271          |\n",
      "|    total_timesteps      | 534528        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00064908946 |\n",
      "|    clip_fraction        | 0.00483       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.256        |\n",
      "|    explained_variance   | 0.695         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 153           |\n",
      "|    n_updates            | 2600          |\n",
      "|    policy_gradient_loss | -0.000478     |\n",
      "|    value_loss           | 580           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 4.39e+03     |\n",
      "|    ep_rew_mean          | 1.52e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 262          |\n",
      "|    time_elapsed         | 9306         |\n",
      "|    total_timesteps      | 536576       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0019984646 |\n",
      "|    clip_fraction        | 0.00723      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.439       |\n",
      "|    explained_variance   | 0.85         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 155          |\n",
      "|    n_updates            | 2610         |\n",
      "|    policy_gradient_loss | -0.000608    |\n",
      "|    value_loss           | 278          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 3.77e+03     |\n",
      "|    ep_rew_mean          | 1.55e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 263          |\n",
      "|    time_elapsed         | 9342         |\n",
      "|    total_timesteps      | 538624       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006443559 |\n",
      "|    clip_fraction        | 0.00439      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.216       |\n",
      "|    explained_variance   | 0.737        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 202          |\n",
      "|    n_updates            | 2620         |\n",
      "|    policy_gradient_loss | -0.00115     |\n",
      "|    value_loss           | 637          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 3.47e+03     |\n",
      "|    ep_rew_mean          | 1.57e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 264          |\n",
      "|    time_elapsed         | 9377         |\n",
      "|    total_timesteps      | 540672       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005528272 |\n",
      "|    clip_fraction        | 0.0123       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.243       |\n",
      "|    explained_variance   | 0.782        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 531          |\n",
      "|    n_updates            | 2630         |\n",
      "|    policy_gradient_loss | -0.00114     |\n",
      "|    value_loss           | 469          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 3.09e+03     |\n",
      "|    ep_rew_mean          | 1.59e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 265          |\n",
      "|    time_elapsed         | 9413         |\n",
      "|    total_timesteps      | 542720       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0009546116 |\n",
      "|    clip_fraction        | 0.0128       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.317       |\n",
      "|    explained_variance   | 0.815        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 76           |\n",
      "|    n_updates            | 2640         |\n",
      "|    policy_gradient_loss | -0.00159     |\n",
      "|    value_loss           | 589          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 2.52e+03      |\n",
      "|    ep_rew_mean          | 1.59e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 266           |\n",
      "|    time_elapsed         | 9449          |\n",
      "|    total_timesteps      | 544768        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00028971612 |\n",
      "|    clip_fraction        | 0.000879      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.296        |\n",
      "|    explained_variance   | 0.784         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 244           |\n",
      "|    n_updates            | 2650          |\n",
      "|    policy_gradient_loss | -0.000713     |\n",
      "|    value_loss           | 491           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 2.44e+03     |\n",
      "|    ep_rew_mean          | 1.6e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 267          |\n",
      "|    time_elapsed         | 9485         |\n",
      "|    total_timesteps      | 546816       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006221824 |\n",
      "|    clip_fraction        | 0.00586      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.288       |\n",
      "|    explained_variance   | 0.803        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 285          |\n",
      "|    n_updates            | 2660         |\n",
      "|    policy_gradient_loss | -0.000723    |\n",
      "|    value_loss           | 554          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 2.21e+03      |\n",
      "|    ep_rew_mean          | 1.63e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 268           |\n",
      "|    time_elapsed         | 9520          |\n",
      "|    total_timesteps      | 548864        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 9.1470225e-05 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.302        |\n",
      "|    explained_variance   | 0.707         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 187           |\n",
      "|    n_updates            | 2670          |\n",
      "|    policy_gradient_loss | -2.18e-05     |\n",
      "|    value_loss           | 588           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.99e+03      |\n",
      "|    ep_rew_mean          | 1.66e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 269           |\n",
      "|    time_elapsed         | 9556          |\n",
      "|    total_timesteps      | 550912        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00018419017 |\n",
      "|    clip_fraction        | 0.00376       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.32         |\n",
      "|    explained_variance   | 0.639         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 318           |\n",
      "|    n_updates            | 2680          |\n",
      "|    policy_gradient_loss | -0.000619     |\n",
      "|    value_loss           | 528           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.92e+03     |\n",
      "|    ep_rew_mean          | 1.68e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 270          |\n",
      "|    time_elapsed         | 9591         |\n",
      "|    total_timesteps      | 552960       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0013346374 |\n",
      "|    clip_fraction        | 0.0115       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.33        |\n",
      "|    explained_variance   | 0.673        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 91.4         |\n",
      "|    n_updates            | 2690         |\n",
      "|    policy_gradient_loss | -0.00123     |\n",
      "|    value_loss           | 601          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.51e+03     |\n",
      "|    ep_rew_mean          | 1.7e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 271          |\n",
      "|    time_elapsed         | 9627         |\n",
      "|    total_timesteps      | 555008       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0016493897 |\n",
      "|    clip_fraction        | 0.0136       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.332       |\n",
      "|    explained_variance   | 0.794        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 399          |\n",
      "|    n_updates            | 2700         |\n",
      "|    policy_gradient_loss | -0.00072     |\n",
      "|    value_loss           | 375          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.38e+03      |\n",
      "|    ep_rew_mean          | 1.7e+03       |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 272           |\n",
      "|    time_elapsed         | 9662          |\n",
      "|    total_timesteps      | 557056        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00033601874 |\n",
      "|    clip_fraction        | 9.77e-05      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.288        |\n",
      "|    explained_variance   | 0.635         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 290           |\n",
      "|    n_updates            | 2710          |\n",
      "|    policy_gradient_loss | -6.44e-05     |\n",
      "|    value_loss           | 775           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.31e+03      |\n",
      "|    ep_rew_mean          | 1.72e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 273           |\n",
      "|    time_elapsed         | 9697          |\n",
      "|    total_timesteps      | 559104        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00037986453 |\n",
      "|    clip_fraction        | 0.00337       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.341        |\n",
      "|    explained_variance   | 0.851         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 126           |\n",
      "|    n_updates            | 2720          |\n",
      "|    policy_gradient_loss | -0.000631     |\n",
      "|    value_loss           | 396           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.23e+03      |\n",
      "|    ep_rew_mean          | 1.72e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 274           |\n",
      "|    time_elapsed         | 9734          |\n",
      "|    total_timesteps      | 561152        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00022007409 |\n",
      "|    clip_fraction        | 0.000391      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.325        |\n",
      "|    explained_variance   | 0.754         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 215           |\n",
      "|    n_updates            | 2730          |\n",
      "|    policy_gradient_loss | -6.32e-05     |\n",
      "|    value_loss           | 503           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.23e+03      |\n",
      "|    ep_rew_mean          | 1.73e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 275           |\n",
      "|    time_elapsed         | 9768          |\n",
      "|    total_timesteps      | 563200        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00044577723 |\n",
      "|    clip_fraction        | 0.000195      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.27         |\n",
      "|    explained_variance   | 0.783         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 372           |\n",
      "|    n_updates            | 2740          |\n",
      "|    policy_gradient_loss | -0.000709     |\n",
      "|    value_loss           | 466           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.7e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 276          |\n",
      "|    time_elapsed         | 9803         |\n",
      "|    total_timesteps      | 565248       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011404883 |\n",
      "|    clip_fraction        | 0.00435      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.264       |\n",
      "|    explained_variance   | 0.746        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 590          |\n",
      "|    n_updates            | 2750         |\n",
      "|    policy_gradient_loss | -0.000952    |\n",
      "|    value_loss           | 511          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.09e+03     |\n",
      "|    ep_rew_mean          | 1.69e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 277          |\n",
      "|    time_elapsed         | 9838         |\n",
      "|    total_timesteps      | 567296       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008372052 |\n",
      "|    clip_fraction        | 0.0116       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.199       |\n",
      "|    explained_variance   | 0.545        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 151          |\n",
      "|    n_updates            | 2760         |\n",
      "|    policy_gradient_loss | -0.00191     |\n",
      "|    value_loss           | 812          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.09e+03      |\n",
      "|    ep_rew_mean          | 1.69e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 278           |\n",
      "|    time_elapsed         | 9873          |\n",
      "|    total_timesteps      | 569344        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00087715354 |\n",
      "|    clip_fraction        | 0.00425       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.313        |\n",
      "|    explained_variance   | 0.861         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 258           |\n",
      "|    n_updates            | 2770          |\n",
      "|    policy_gradient_loss | -0.000576     |\n",
      "|    value_loss           | 349           |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.08e+03    |\n",
      "|    ep_rew_mean          | 1.7e+03     |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 279         |\n",
      "|    time_elapsed         | 9908        |\n",
      "|    total_timesteps      | 571392      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.000363363 |\n",
      "|    clip_fraction        | 0           |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.299      |\n",
      "|    explained_variance   | 0.579       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 122         |\n",
      "|    n_updates            | 2780        |\n",
      "|    policy_gradient_loss | -0.000296   |\n",
      "|    value_loss           | 726         |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.09e+03      |\n",
      "|    ep_rew_mean          | 1.7e+03       |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 280           |\n",
      "|    time_elapsed         | 9943          |\n",
      "|    total_timesteps      | 573440        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00080777414 |\n",
      "|    clip_fraction        | 0.00244       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.255        |\n",
      "|    explained_variance   | 0.724         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 586           |\n",
      "|    n_updates            | 2790          |\n",
      "|    policy_gradient_loss | -0.000609     |\n",
      "|    value_loss           | 446           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.09e+03      |\n",
      "|    ep_rew_mean          | 1.7e+03       |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 281           |\n",
      "|    time_elapsed         | 9979          |\n",
      "|    total_timesteps      | 575488        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00046614461 |\n",
      "|    clip_fraction        | 0.00107       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.266        |\n",
      "|    explained_variance   | 0.806         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 296           |\n",
      "|    n_updates            | 2800          |\n",
      "|    policy_gradient_loss | -2.9e-05      |\n",
      "|    value_loss           | 487           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.07e+03     |\n",
      "|    ep_rew_mean          | 1.68e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 282          |\n",
      "|    time_elapsed         | 10014        |\n",
      "|    total_timesteps      | 577536       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007710743 |\n",
      "|    clip_fraction        | 0.00337      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.283       |\n",
      "|    explained_variance   | 0.844        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 226          |\n",
      "|    n_updates            | 2810         |\n",
      "|    policy_gradient_loss | -0.00122     |\n",
      "|    value_loss           | 388          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.06e+03      |\n",
      "|    ep_rew_mean          | 1.67e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 283           |\n",
      "|    time_elapsed         | 10049         |\n",
      "|    total_timesteps      | 579584        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00025100127 |\n",
      "|    clip_fraction        | 0.000146      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.258        |\n",
      "|    explained_variance   | 0.716         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 188           |\n",
      "|    n_updates            | 2820          |\n",
      "|    policy_gradient_loss | -0.000231     |\n",
      "|    value_loss           | 683           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.06e+03     |\n",
      "|    ep_rew_mean          | 1.68e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 284          |\n",
      "|    time_elapsed         | 10086        |\n",
      "|    total_timesteps      | 581632       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0009210623 |\n",
      "|    clip_fraction        | 0.00249      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.235       |\n",
      "|    explained_variance   | 0.806        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 208          |\n",
      "|    n_updates            | 2830         |\n",
      "|    policy_gradient_loss | -0.000821    |\n",
      "|    value_loss           | 423          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.06e+03      |\n",
      "|    ep_rew_mean          | 1.69e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 285           |\n",
      "|    time_elapsed         | 10120         |\n",
      "|    total_timesteps      | 583680        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00027976994 |\n",
      "|    clip_fraction        | 0.000977      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.227        |\n",
      "|    explained_variance   | 0.656         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 193           |\n",
      "|    n_updates            | 2840          |\n",
      "|    policy_gradient_loss | -5.5e-05      |\n",
      "|    value_loss           | 551           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.05e+03      |\n",
      "|    ep_rew_mean          | 1.67e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 286           |\n",
      "|    time_elapsed         | 10154         |\n",
      "|    total_timesteps      | 585728        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00029677016 |\n",
      "|    clip_fraction        | 0.00112       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.224        |\n",
      "|    explained_variance   | 0.758         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 81.2          |\n",
      "|    n_updates            | 2850          |\n",
      "|    policy_gradient_loss | -0.000461     |\n",
      "|    value_loss           | 376           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.05e+03     |\n",
      "|    ep_rew_mean          | 1.69e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 287          |\n",
      "|    time_elapsed         | 10190        |\n",
      "|    total_timesteps      | 587776       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014049985 |\n",
      "|    clip_fraction        | 0.00977      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.22        |\n",
      "|    explained_variance   | 0.816        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 440          |\n",
      "|    n_updates            | 2860         |\n",
      "|    policy_gradient_loss | -0.00127     |\n",
      "|    value_loss           | 489          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.05e+03     |\n",
      "|    ep_rew_mean          | 1.69e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 288          |\n",
      "|    time_elapsed         | 10225        |\n",
      "|    total_timesteps      | 589824       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010117774 |\n",
      "|    clip_fraction        | 0.0115       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.189       |\n",
      "|    explained_variance   | 0.805        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 111          |\n",
      "|    n_updates            | 2870         |\n",
      "|    policy_gradient_loss | -0.00128     |\n",
      "|    value_loss           | 348          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.06e+03     |\n",
      "|    ep_rew_mean          | 1.7e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 289          |\n",
      "|    time_elapsed         | 10262        |\n",
      "|    total_timesteps      | 591872       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0001540629 |\n",
      "|    clip_fraction        | 4.88e-05     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.244       |\n",
      "|    explained_variance   | 0.855        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 151          |\n",
      "|    n_updates            | 2880         |\n",
      "|    policy_gradient_loss | -0.000344    |\n",
      "|    value_loss           | 411          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.05e+03      |\n",
      "|    ep_rew_mean          | 1.69e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 290           |\n",
      "|    time_elapsed         | 10297         |\n",
      "|    total_timesteps      | 593920        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00030810665 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.207        |\n",
      "|    explained_variance   | 0.72          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 389           |\n",
      "|    n_updates            | 2890          |\n",
      "|    policy_gradient_loss | 8.22e-05      |\n",
      "|    value_loss           | 515           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.05e+03      |\n",
      "|    ep_rew_mean          | 1.69e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 291           |\n",
      "|    time_elapsed         | 10333         |\n",
      "|    total_timesteps      | 595968        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00010392172 |\n",
      "|    clip_fraction        | 9.77e-05      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.144        |\n",
      "|    explained_variance   | 0.677         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 214           |\n",
      "|    n_updates            | 2900          |\n",
      "|    policy_gradient_loss | -0.000233     |\n",
      "|    value_loss           | 538           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.04e+03     |\n",
      "|    ep_rew_mean          | 1.69e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 292          |\n",
      "|    time_elapsed         | 10369        |\n",
      "|    total_timesteps      | 598016       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005484726 |\n",
      "|    clip_fraction        | 0.00249      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.167       |\n",
      "|    explained_variance   | 0.686        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 181          |\n",
      "|    n_updates            | 2910         |\n",
      "|    policy_gradient_loss | -0.000926    |\n",
      "|    value_loss           | 488          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.04e+03      |\n",
      "|    ep_rew_mean          | 1.69e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 293           |\n",
      "|    time_elapsed         | 10406         |\n",
      "|    total_timesteps      | 600064        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00010774241 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.188        |\n",
      "|    explained_variance   | 0.661         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 190           |\n",
      "|    n_updates            | 2920          |\n",
      "|    policy_gradient_loss | 0.000276      |\n",
      "|    value_loss           | 500           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.01e+03     |\n",
      "|    ep_rew_mean          | 1.67e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 294          |\n",
      "|    time_elapsed         | 10442        |\n",
      "|    total_timesteps      | 602112       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 7.073066e-05 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.189       |\n",
      "|    explained_variance   | 0.756        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 129          |\n",
      "|    n_updates            | 2930         |\n",
      "|    policy_gradient_loss | 0.000226     |\n",
      "|    value_loss           | 536          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.01e+03      |\n",
      "|    ep_rew_mean          | 1.66e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 295           |\n",
      "|    time_elapsed         | 10477         |\n",
      "|    total_timesteps      | 604160        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00038762958 |\n",
      "|    clip_fraction        | 0.00083       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.175        |\n",
      "|    explained_variance   | 0.717         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 107           |\n",
      "|    n_updates            | 2940          |\n",
      "|    policy_gradient_loss | -0.000443     |\n",
      "|    value_loss           | 461           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.01e+03      |\n",
      "|    ep_rew_mean          | 1.67e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 296           |\n",
      "|    time_elapsed         | 10513         |\n",
      "|    total_timesteps      | 606208        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00011023323 |\n",
      "|    clip_fraction        | 0.000684      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.214        |\n",
      "|    explained_variance   | 0.712         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 182           |\n",
      "|    n_updates            | 2950          |\n",
      "|    policy_gradient_loss | -0.000204     |\n",
      "|    value_loss           | 626           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 992          |\n",
      "|    ep_rew_mean          | 1.66e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 297          |\n",
      "|    time_elapsed         | 10548        |\n",
      "|    total_timesteps      | 608256       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0002710791 |\n",
      "|    clip_fraction        | 0.000195     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.232       |\n",
      "|    explained_variance   | 0.804        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 310          |\n",
      "|    n_updates            | 2960         |\n",
      "|    policy_gradient_loss | -0.000185    |\n",
      "|    value_loss           | 318          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 992           |\n",
      "|    ep_rew_mean          | 1.66e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 298           |\n",
      "|    time_elapsed         | 10583         |\n",
      "|    total_timesteps      | 610304        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00018317302 |\n",
      "|    clip_fraction        | 9.77e-05      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.219        |\n",
      "|    explained_variance   | 0.756         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 200           |\n",
      "|    n_updates            | 2970          |\n",
      "|    policy_gradient_loss | -0.000202     |\n",
      "|    value_loss           | 432           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 993          |\n",
      "|    ep_rew_mean          | 1.66e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 299          |\n",
      "|    time_elapsed         | 10619        |\n",
      "|    total_timesteps      | 612352       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0004844974 |\n",
      "|    clip_fraction        | 0.00239      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.248       |\n",
      "|    explained_variance   | 0.785        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 199          |\n",
      "|    n_updates            | 2980         |\n",
      "|    policy_gradient_loss | -0.000857    |\n",
      "|    value_loss           | 461          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1e+03         |\n",
      "|    ep_rew_mean          | 1.66e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 300           |\n",
      "|    time_elapsed         | 10654         |\n",
      "|    total_timesteps      | 614400        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00032667257 |\n",
      "|    clip_fraction        | 9.77e-05      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.251        |\n",
      "|    explained_variance   | 0.76          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 102           |\n",
      "|    n_updates            | 2990          |\n",
      "|    policy_gradient_loss | -4.66e-05     |\n",
      "|    value_loss           | 551           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 982           |\n",
      "|    ep_rew_mean          | 1.67e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 301           |\n",
      "|    time_elapsed         | 10689         |\n",
      "|    total_timesteps      | 616448        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00015561437 |\n",
      "|    clip_fraction        | 4.88e-05      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.254        |\n",
      "|    explained_variance   | 0.749         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 291           |\n",
      "|    n_updates            | 3000          |\n",
      "|    policy_gradient_loss | -0.000592     |\n",
      "|    value_loss           | 537           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 971           |\n",
      "|    ep_rew_mean          | 1.66e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 302           |\n",
      "|    time_elapsed         | 10725         |\n",
      "|    total_timesteps      | 618496        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00010417384 |\n",
      "|    clip_fraction        | 0.000391      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.198        |\n",
      "|    explained_variance   | 0.686         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 136           |\n",
      "|    n_updates            | 3010          |\n",
      "|    policy_gradient_loss | -0.00016      |\n",
      "|    value_loss           | 506           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 967          |\n",
      "|    ep_rew_mean          | 1.66e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 303          |\n",
      "|    time_elapsed         | 10761        |\n",
      "|    total_timesteps      | 620544       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0004372687 |\n",
      "|    clip_fraction        | 0.000635     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.213       |\n",
      "|    explained_variance   | 0.815        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 262          |\n",
      "|    n_updates            | 3020         |\n",
      "|    policy_gradient_loss | -0.000124    |\n",
      "|    value_loss           | 454          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 961           |\n",
      "|    ep_rew_mean          | 1.65e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 304           |\n",
      "|    time_elapsed         | 10794         |\n",
      "|    total_timesteps      | 622592        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00043646074 |\n",
      "|    clip_fraction        | 0.00366       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.176        |\n",
      "|    explained_variance   | 0.697         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 424           |\n",
      "|    n_updates            | 3030          |\n",
      "|    policy_gradient_loss | -0.000318     |\n",
      "|    value_loss           | 581           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 943          |\n",
      "|    ep_rew_mean          | 1.63e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 305          |\n",
      "|    time_elapsed         | 10830        |\n",
      "|    total_timesteps      | 624640       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0001666245 |\n",
      "|    clip_fraction        | 4.88e-05     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.177       |\n",
      "|    explained_variance   | 0.69         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 137          |\n",
      "|    n_updates            | 3040         |\n",
      "|    policy_gradient_loss | -0.000173    |\n",
      "|    value_loss           | 513          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 946           |\n",
      "|    ep_rew_mean          | 1.64e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 306           |\n",
      "|    time_elapsed         | 10865         |\n",
      "|    total_timesteps      | 626688        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00041475458 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.176        |\n",
      "|    explained_variance   | 0.72          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 303           |\n",
      "|    n_updates            | 3050          |\n",
      "|    policy_gradient_loss | -0.000392     |\n",
      "|    value_loss           | 565           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 919           |\n",
      "|    ep_rew_mean          | 1.61e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 307           |\n",
      "|    time_elapsed         | 10900         |\n",
      "|    total_timesteps      | 628736        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 8.1287726e-05 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.2          |\n",
      "|    explained_variance   | 0.744         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 146           |\n",
      "|    n_updates            | 3060          |\n",
      "|    policy_gradient_loss | -6.93e-05     |\n",
      "|    value_loss           | 504           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 924          |\n",
      "|    ep_rew_mean          | 1.62e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 308          |\n",
      "|    time_elapsed         | 10935        |\n",
      "|    total_timesteps      | 630784       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0003153828 |\n",
      "|    clip_fraction        | 0.000293     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.209       |\n",
      "|    explained_variance   | 0.722        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 361          |\n",
      "|    n_updates            | 3070         |\n",
      "|    policy_gradient_loss | -0.000171    |\n",
      "|    value_loss           | 651          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 923          |\n",
      "|    ep_rew_mean          | 1.62e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 309          |\n",
      "|    time_elapsed         | 10970        |\n",
      "|    total_timesteps      | 632832       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008045726 |\n",
      "|    clip_fraction        | 0.0174       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.225       |\n",
      "|    explained_variance   | 0.693        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 132          |\n",
      "|    n_updates            | 3080         |\n",
      "|    policy_gradient_loss | -0.00141     |\n",
      "|    value_loss           | 467          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 920          |\n",
      "|    ep_rew_mean          | 1.62e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 310          |\n",
      "|    time_elapsed         | 11005        |\n",
      "|    total_timesteps      | 634880       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0009806401 |\n",
      "|    clip_fraction        | 0.00977      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.236       |\n",
      "|    explained_variance   | 0.738        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 481          |\n",
      "|    n_updates            | 3090         |\n",
      "|    policy_gradient_loss | -0.000945    |\n",
      "|    value_loss           | 509          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 927          |\n",
      "|    ep_rew_mean          | 1.63e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 311          |\n",
      "|    time_elapsed         | 11040        |\n",
      "|    total_timesteps      | 636928       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0001886778 |\n",
      "|    clip_fraction        | 0.000439     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.208       |\n",
      "|    explained_variance   | 0.739        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 173          |\n",
      "|    n_updates            | 3100         |\n",
      "|    policy_gradient_loss | -0.000278    |\n",
      "|    value_loss           | 492          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 929           |\n",
      "|    ep_rew_mean          | 1.64e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 312           |\n",
      "|    time_elapsed         | 11076         |\n",
      "|    total_timesteps      | 638976        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00040292874 |\n",
      "|    clip_fraction        | 0.00186       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.19         |\n",
      "|    explained_variance   | 0.605         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 344           |\n",
      "|    n_updates            | 3110          |\n",
      "|    policy_gradient_loss | -0.000422     |\n",
      "|    value_loss           | 542           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 923          |\n",
      "|    ep_rew_mean          | 1.61e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 313          |\n",
      "|    time_elapsed         | 11112        |\n",
      "|    total_timesteps      | 641024       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0013283271 |\n",
      "|    clip_fraction        | 0.0117       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.208       |\n",
      "|    explained_variance   | 0.716        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 244          |\n",
      "|    n_updates            | 3120         |\n",
      "|    policy_gradient_loss | -0.000847    |\n",
      "|    value_loss           | 431          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 911           |\n",
      "|    ep_rew_mean          | 1.6e+03       |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 314           |\n",
      "|    time_elapsed         | 11147         |\n",
      "|    total_timesteps      | 643072        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00047759677 |\n",
      "|    clip_fraction        | 0.0103        |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.22         |\n",
      "|    explained_variance   | 0.803         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 345           |\n",
      "|    n_updates            | 3130          |\n",
      "|    policy_gradient_loss | -0.000961     |\n",
      "|    value_loss           | 457           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 908           |\n",
      "|    ep_rew_mean          | 1.59e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 315           |\n",
      "|    time_elapsed         | 11182         |\n",
      "|    total_timesteps      | 645120        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00060250185 |\n",
      "|    clip_fraction        | 0.00264       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.235        |\n",
      "|    explained_variance   | 0.595         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 524           |\n",
      "|    n_updates            | 3140          |\n",
      "|    policy_gradient_loss | -0.000365     |\n",
      "|    value_loss           | 674           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 916           |\n",
      "|    ep_rew_mean          | 1.6e+03       |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 316           |\n",
      "|    time_elapsed         | 11218         |\n",
      "|    total_timesteps      | 647168        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00060700806 |\n",
      "|    clip_fraction        | 0.00747       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.251        |\n",
      "|    explained_variance   | 0.758         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 139           |\n",
      "|    n_updates            | 3150          |\n",
      "|    policy_gradient_loss | -0.000762     |\n",
      "|    value_loss           | 481           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 921           |\n",
      "|    ep_rew_mean          | 1.61e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 317           |\n",
      "|    time_elapsed         | 11253         |\n",
      "|    total_timesteps      | 649216        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00053788326 |\n",
      "|    clip_fraction        | 0.00117       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.287        |\n",
      "|    explained_variance   | 0.756         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 273           |\n",
      "|    n_updates            | 3160          |\n",
      "|    policy_gradient_loss | -8.25e-05     |\n",
      "|    value_loss           | 514           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 915           |\n",
      "|    ep_rew_mean          | 1.6e+03       |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 318           |\n",
      "|    time_elapsed         | 11288         |\n",
      "|    total_timesteps      | 651264        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00047744648 |\n",
      "|    clip_fraction        | 0.000293      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.252        |\n",
      "|    explained_variance   | 0.799         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 205           |\n",
      "|    n_updates            | 3170          |\n",
      "|    policy_gradient_loss | -0.000223     |\n",
      "|    value_loss           | 388           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 901           |\n",
      "|    ep_rew_mean          | 1.57e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 319           |\n",
      "|    time_elapsed         | 11323         |\n",
      "|    total_timesteps      | 653312        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00013965965 |\n",
      "|    clip_fraction        | 0.000195      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.231        |\n",
      "|    explained_variance   | 0.808         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 112           |\n",
      "|    n_updates            | 3180          |\n",
      "|    policy_gradient_loss | -0.000424     |\n",
      "|    value_loss           | 401           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 911           |\n",
      "|    ep_rew_mean          | 1.59e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 320           |\n",
      "|    time_elapsed         | 11359         |\n",
      "|    total_timesteps      | 655360        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00056436495 |\n",
      "|    clip_fraction        | 0.0119        |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.232        |\n",
      "|    explained_variance   | 0.732         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 396           |\n",
      "|    n_updates            | 3190          |\n",
      "|    policy_gradient_loss | -0.000734     |\n",
      "|    value_loss           | 644           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 917          |\n",
      "|    ep_rew_mean          | 1.59e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 321          |\n",
      "|    time_elapsed         | 11395        |\n",
      "|    total_timesteps      | 657408       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008781799 |\n",
      "|    clip_fraction        | 0.00649      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.259       |\n",
      "|    explained_variance   | 0.787        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 172          |\n",
      "|    n_updates            | 3200         |\n",
      "|    policy_gradient_loss | -0.000697    |\n",
      "|    value_loss           | 447          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 932           |\n",
      "|    ep_rew_mean          | 1.61e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 322           |\n",
      "|    time_elapsed         | 11430         |\n",
      "|    total_timesteps      | 659456        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00093753485 |\n",
      "|    clip_fraction        | 0.0131        |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.292        |\n",
      "|    explained_variance   | 0.717         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 100           |\n",
      "|    n_updates            | 3210          |\n",
      "|    policy_gradient_loss | -0.00124      |\n",
      "|    value_loss           | 506           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 932          |\n",
      "|    ep_rew_mean          | 1.61e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 323          |\n",
      "|    time_elapsed         | 11467        |\n",
      "|    total_timesteps      | 661504       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007506433 |\n",
      "|    clip_fraction        | 0.000732     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.425       |\n",
      "|    explained_variance   | 0.881        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 312          |\n",
      "|    n_updates            | 3220         |\n",
      "|    policy_gradient_loss | -0.000314    |\n",
      "|    value_loss           | 384          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 957          |\n",
      "|    ep_rew_mean          | 1.62e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 324          |\n",
      "|    time_elapsed         | 11500        |\n",
      "|    total_timesteps      | 663552       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0020895237 |\n",
      "|    clip_fraction        | 0.0135       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.441       |\n",
      "|    explained_variance   | 0.92         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 23.2         |\n",
      "|    n_updates            | 3230         |\n",
      "|    policy_gradient_loss | -0.00184     |\n",
      "|    value_loss           | 189          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 964           |\n",
      "|    ep_rew_mean          | 1.63e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 325           |\n",
      "|    time_elapsed         | 11536         |\n",
      "|    total_timesteps      | 665600        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00011923618 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.329        |\n",
      "|    explained_variance   | 0.855         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 242           |\n",
      "|    n_updates            | 3240          |\n",
      "|    policy_gradient_loss | -6.13e-05     |\n",
      "|    value_loss           | 422           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 966          |\n",
      "|    ep_rew_mean          | 1.63e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 326          |\n",
      "|    time_elapsed         | 11572        |\n",
      "|    total_timesteps      | 667648       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 9.114877e-05 |\n",
      "|    clip_fraction        | 9.77e-05     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.265       |\n",
      "|    explained_variance   | 0.774        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 363          |\n",
      "|    n_updates            | 3250         |\n",
      "|    policy_gradient_loss | -0.000241    |\n",
      "|    value_loss           | 522          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 966           |\n",
      "|    ep_rew_mean          | 1.64e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 327           |\n",
      "|    time_elapsed         | 11607         |\n",
      "|    total_timesteps      | 669696        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00025234072 |\n",
      "|    clip_fraction        | 0.00591       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.304        |\n",
      "|    explained_variance   | 0.861         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 161           |\n",
      "|    n_updates            | 3260          |\n",
      "|    policy_gradient_loss | -0.000386     |\n",
      "|    value_loss           | 399           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 968          |\n",
      "|    ep_rew_mean          | 1.64e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 328          |\n",
      "|    time_elapsed         | 11642        |\n",
      "|    total_timesteps      | 671744       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011898365 |\n",
      "|    clip_fraction        | 0.00947      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.228       |\n",
      "|    explained_variance   | 0.749        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 267          |\n",
      "|    n_updates            | 3270         |\n",
      "|    policy_gradient_loss | -0.00103     |\n",
      "|    value_loss           | 498          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 984          |\n",
      "|    ep_rew_mean          | 1.66e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 329          |\n",
      "|    time_elapsed         | 11677        |\n",
      "|    total_timesteps      | 673792       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0004798731 |\n",
      "|    clip_fraction        | 0.00444      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.263       |\n",
      "|    explained_variance   | 0.882        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 129          |\n",
      "|    n_updates            | 3280         |\n",
      "|    policy_gradient_loss | -0.000669    |\n",
      "|    value_loss           | 285          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 992           |\n",
      "|    ep_rew_mean          | 1.67e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 330           |\n",
      "|    time_elapsed         | 11712         |\n",
      "|    total_timesteps      | 675840        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00014744885 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.28         |\n",
      "|    explained_variance   | 0.809         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 488           |\n",
      "|    n_updates            | 3290          |\n",
      "|    policy_gradient_loss | 5.78e-05      |\n",
      "|    value_loss           | 602           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.01e+03     |\n",
      "|    ep_rew_mean          | 1.69e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 331          |\n",
      "|    time_elapsed         | 11748        |\n",
      "|    total_timesteps      | 677888       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0016728013 |\n",
      "|    clip_fraction        | 0.00991      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.254       |\n",
      "|    explained_variance   | 0.819        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 116          |\n",
      "|    n_updates            | 3300         |\n",
      "|    policy_gradient_loss | -0.000665    |\n",
      "|    value_loss           | 323          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.01e+03     |\n",
      "|    ep_rew_mean          | 1.69e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 332          |\n",
      "|    time_elapsed         | 11783        |\n",
      "|    total_timesteps      | 679936       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005284014 |\n",
      "|    clip_fraction        | 0.000146     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.227       |\n",
      "|    explained_variance   | 0.832        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 94.7         |\n",
      "|    n_updates            | 3310         |\n",
      "|    policy_gradient_loss | -0.000227    |\n",
      "|    value_loss           | 403          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.01e+03     |\n",
      "|    ep_rew_mean          | 1.67e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 333          |\n",
      "|    time_elapsed         | 11820        |\n",
      "|    total_timesteps      | 681984       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007416675 |\n",
      "|    clip_fraction        | 0.0019       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.243       |\n",
      "|    explained_variance   | 0.801        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 144          |\n",
      "|    n_updates            | 3320         |\n",
      "|    policy_gradient_loss | -0.000383    |\n",
      "|    value_loss           | 674          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1e+03        |\n",
      "|    ep_rew_mean          | 1.65e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 334          |\n",
      "|    time_elapsed         | 11854        |\n",
      "|    total_timesteps      | 684032       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006152511 |\n",
      "|    clip_fraction        | 0.00444      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.218       |\n",
      "|    explained_variance   | 0.875        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 137          |\n",
      "|    n_updates            | 3330         |\n",
      "|    policy_gradient_loss | -0.00056     |\n",
      "|    value_loss           | 463          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 995          |\n",
      "|    ep_rew_mean          | 1.65e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 335          |\n",
      "|    time_elapsed         | 11889        |\n",
      "|    total_timesteps      | 686080       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0009616737 |\n",
      "|    clip_fraction        | 0.018        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.237       |\n",
      "|    explained_variance   | 0.707        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 228          |\n",
      "|    n_updates            | 3340         |\n",
      "|    policy_gradient_loss | -0.00167     |\n",
      "|    value_loss           | 668          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 996          |\n",
      "|    ep_rew_mean          | 1.64e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 336          |\n",
      "|    time_elapsed         | 11924        |\n",
      "|    total_timesteps      | 688128       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010948731 |\n",
      "|    clip_fraction        | 0.0217       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.224       |\n",
      "|    explained_variance   | 0.667        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 182          |\n",
      "|    n_updates            | 3350         |\n",
      "|    policy_gradient_loss | -0.00179     |\n",
      "|    value_loss           | 569          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1e+03         |\n",
      "|    ep_rew_mean          | 1.65e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 337           |\n",
      "|    time_elapsed         | 11960         |\n",
      "|    total_timesteps      | 690176        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00029908516 |\n",
      "|    clip_fraction        | 0.00107       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.309        |\n",
      "|    explained_variance   | 0.872         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 127           |\n",
      "|    n_updates            | 3360          |\n",
      "|    policy_gradient_loss | -0.00011      |\n",
      "|    value_loss           | 304           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.01e+03      |\n",
      "|    ep_rew_mean          | 1.65e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 338           |\n",
      "|    time_elapsed         | 11996         |\n",
      "|    total_timesteps      | 692224        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00016680744 |\n",
      "|    clip_fraction        | 0.000293      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.255        |\n",
      "|    explained_variance   | 0.778         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 245           |\n",
      "|    n_updates            | 3370          |\n",
      "|    policy_gradient_loss | -0.000532     |\n",
      "|    value_loss           | 424           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.01e+03     |\n",
      "|    ep_rew_mean          | 1.67e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 339          |\n",
      "|    time_elapsed         | 12031        |\n",
      "|    total_timesteps      | 694272       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008677966 |\n",
      "|    clip_fraction        | 0.0183       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.293       |\n",
      "|    explained_variance   | 0.779        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 436          |\n",
      "|    n_updates            | 3380         |\n",
      "|    policy_gradient_loss | -0.00207     |\n",
      "|    value_loss           | 585          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.04e+03     |\n",
      "|    ep_rew_mean          | 1.69e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 340          |\n",
      "|    time_elapsed         | 12066        |\n",
      "|    total_timesteps      | 696320       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011546584 |\n",
      "|    clip_fraction        | 0.0151       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.316       |\n",
      "|    explained_variance   | 0.756        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 358          |\n",
      "|    n_updates            | 3390         |\n",
      "|    policy_gradient_loss | -0.000969    |\n",
      "|    value_loss           | 581          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.03e+03     |\n",
      "|    ep_rew_mean          | 1.68e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 341          |\n",
      "|    time_elapsed         | 12102        |\n",
      "|    total_timesteps      | 698368       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0021092386 |\n",
      "|    clip_fraction        | 0.0143       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.424       |\n",
      "|    explained_variance   | 0.938        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 85.2         |\n",
      "|    n_updates            | 3400         |\n",
      "|    policy_gradient_loss | -0.00127     |\n",
      "|    value_loss           | 247          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.03e+03      |\n",
      "|    ep_rew_mean          | 1.68e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 342           |\n",
      "|    time_elapsed         | 12139         |\n",
      "|    total_timesteps      | 700416        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00055599486 |\n",
      "|    clip_fraction        | 0.000879      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.29         |\n",
      "|    explained_variance   | 0.874         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 246           |\n",
      "|    n_updates            | 3410          |\n",
      "|    policy_gradient_loss | -0.000681     |\n",
      "|    value_loss           | 389           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.03e+03     |\n",
      "|    ep_rew_mean          | 1.68e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 343          |\n",
      "|    time_elapsed         | 12173        |\n",
      "|    total_timesteps      | 702464       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012182269 |\n",
      "|    clip_fraction        | 0.00459      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.301       |\n",
      "|    explained_variance   | 0.803        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 379          |\n",
      "|    n_updates            | 3420         |\n",
      "|    policy_gradient_loss | -0.000916    |\n",
      "|    value_loss           | 575          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.04e+03      |\n",
      "|    ep_rew_mean          | 1.69e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 344           |\n",
      "|    time_elapsed         | 12208         |\n",
      "|    total_timesteps      | 704512        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00077745225 |\n",
      "|    clip_fraction        | 0.000635      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.237        |\n",
      "|    explained_variance   | 0.806         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 189           |\n",
      "|    n_updates            | 3430          |\n",
      "|    policy_gradient_loss | -0.000149     |\n",
      "|    value_loss           | 399           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.04e+03     |\n",
      "|    ep_rew_mean          | 1.68e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 345          |\n",
      "|    time_elapsed         | 12243        |\n",
      "|    total_timesteps      | 706560       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007387592 |\n",
      "|    clip_fraction        | 0.00962      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.245       |\n",
      "|    explained_variance   | 0.794        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 88.1         |\n",
      "|    n_updates            | 3440         |\n",
      "|    policy_gradient_loss | -0.00112     |\n",
      "|    value_loss           | 642          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.05e+03      |\n",
      "|    ep_rew_mean          | 1.69e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 346           |\n",
      "|    time_elapsed         | 12278         |\n",
      "|    total_timesteps      | 708608        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00079323375 |\n",
      "|    clip_fraction        | 0.00752       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.275        |\n",
      "|    explained_variance   | 0.733         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 369           |\n",
      "|    n_updates            | 3450          |\n",
      "|    policy_gradient_loss | -0.000252     |\n",
      "|    value_loss           | 561           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.06e+03     |\n",
      "|    ep_rew_mean          | 1.71e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 347          |\n",
      "|    time_elapsed         | 12313        |\n",
      "|    total_timesteps      | 710656       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007023644 |\n",
      "|    clip_fraction        | 0.00376      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.283       |\n",
      "|    explained_variance   | 0.816        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 257          |\n",
      "|    n_updates            | 3460         |\n",
      "|    policy_gradient_loss | 7.88e-05     |\n",
      "|    value_loss           | 336          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.07e+03     |\n",
      "|    ep_rew_mean          | 1.72e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 348          |\n",
      "|    time_elapsed         | 12348        |\n",
      "|    total_timesteps      | 712704       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010503012 |\n",
      "|    clip_fraction        | 0.000244     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.344       |\n",
      "|    explained_variance   | 0.886        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 173          |\n",
      "|    n_updates            | 3470         |\n",
      "|    policy_gradient_loss | -0.000539    |\n",
      "|    value_loss           | 427          |\n",
      "------------------------------------------\n",
      "--------------------------------------------\n",
      "| rollout/                |                |\n",
      "|    ep_len_mean          | 1.06e+03       |\n",
      "|    ep_rew_mean          | 1.7e+03        |\n",
      "| time/                   |                |\n",
      "|    fps                  | 57             |\n",
      "|    iterations           | 349            |\n",
      "|    time_elapsed         | 12384          |\n",
      "|    total_timesteps      | 714752         |\n",
      "| train/                  |                |\n",
      "|    approx_kl            | 0.000103157334 |\n",
      "|    clip_fraction        | 0              |\n",
      "|    clip_range           | 0.2            |\n",
      "|    entropy_loss         | -0.264         |\n",
      "|    explained_variance   | 0.757          |\n",
      "|    learning_rate        | 1e-06          |\n",
      "|    loss                 | 122            |\n",
      "|    n_updates            | 3480           |\n",
      "|    policy_gradient_loss | 7.32e-05       |\n",
      "|    value_loss           | 574            |\n",
      "--------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.06e+03    |\n",
      "|    ep_rew_mean          | 1.69e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 350         |\n",
      "|    time_elapsed         | 12419       |\n",
      "|    total_timesteps      | 716800      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.000948646 |\n",
      "|    clip_fraction        | 0.00522     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.24       |\n",
      "|    explained_variance   | 0.836       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 382         |\n",
      "|    n_updates            | 3490        |\n",
      "|    policy_gradient_loss | -0.000518   |\n",
      "|    value_loss           | 469         |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.05e+03      |\n",
      "|    ep_rew_mean          | 1.69e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 351           |\n",
      "|    time_elapsed         | 12454         |\n",
      "|    total_timesteps      | 718848        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00059082336 |\n",
      "|    clip_fraction        | 0.00859       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.234        |\n",
      "|    explained_variance   | 0.807         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 157           |\n",
      "|    n_updates            | 3500          |\n",
      "|    policy_gradient_loss | -0.00121      |\n",
      "|    value_loss           | 507           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.05e+03      |\n",
      "|    ep_rew_mean          | 1.67e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 352           |\n",
      "|    time_elapsed         | 12490         |\n",
      "|    total_timesteps      | 720896        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00017626034 |\n",
      "|    clip_fraction        | 0.000879      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.27         |\n",
      "|    explained_variance   | 0.813         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 449           |\n",
      "|    n_updates            | 3510          |\n",
      "|    policy_gradient_loss | -0.000327     |\n",
      "|    value_loss           | 581           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.04e+03     |\n",
      "|    ep_rew_mean          | 1.67e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 353          |\n",
      "|    time_elapsed         | 12525        |\n",
      "|    total_timesteps      | 722944       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011628242 |\n",
      "|    clip_fraction        | 0.0241       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.272       |\n",
      "|    explained_variance   | 0.654        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 308          |\n",
      "|    n_updates            | 3520         |\n",
      "|    policy_gradient_loss | -0.00169     |\n",
      "|    value_loss           | 525          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.05e+03      |\n",
      "|    ep_rew_mean          | 1.69e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 354           |\n",
      "|    time_elapsed         | 12561         |\n",
      "|    total_timesteps      | 724992        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00012685102 |\n",
      "|    clip_fraction        | 0.000293      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.295        |\n",
      "|    explained_variance   | 0.788         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 331           |\n",
      "|    n_updates            | 3530          |\n",
      "|    policy_gradient_loss | -7.46e-05     |\n",
      "|    value_loss           | 489           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.06e+03     |\n",
      "|    ep_rew_mean          | 1.7e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 355          |\n",
      "|    time_elapsed         | 12596        |\n",
      "|    total_timesteps      | 727040       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0013820224 |\n",
      "|    clip_fraction        | 0.0172       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.301       |\n",
      "|    explained_variance   | 0.587        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 292          |\n",
      "|    n_updates            | 3540         |\n",
      "|    policy_gradient_loss | -0.000978    |\n",
      "|    value_loss           | 564          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.06e+03     |\n",
      "|    ep_rew_mean          | 1.71e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 356          |\n",
      "|    time_elapsed         | 12632        |\n",
      "|    total_timesteps      | 729088       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012461513 |\n",
      "|    clip_fraction        | 0.00449      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.318       |\n",
      "|    explained_variance   | 0.765        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 241          |\n",
      "|    n_updates            | 3550         |\n",
      "|    policy_gradient_loss | -0.000533    |\n",
      "|    value_loss           | 441          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.07e+03     |\n",
      "|    ep_rew_mean          | 1.74e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 357          |\n",
      "|    time_elapsed         | 12668        |\n",
      "|    total_timesteps      | 731136       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005984962 |\n",
      "|    clip_fraction        | 0.00508      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.281       |\n",
      "|    explained_variance   | 0.691        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 156          |\n",
      "|    n_updates            | 3560         |\n",
      "|    policy_gradient_loss | -0.000463    |\n",
      "|    value_loss           | 497          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.09e+03     |\n",
      "|    ep_rew_mean          | 1.75e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 358          |\n",
      "|    time_elapsed         | 12703        |\n",
      "|    total_timesteps      | 733184       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0015309395 |\n",
      "|    clip_fraction        | 0.00366      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.324       |\n",
      "|    explained_variance   | 0.787        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 239          |\n",
      "|    n_updates            | 3570         |\n",
      "|    policy_gradient_loss | -0.000631    |\n",
      "|    value_loss           | 465          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.1e+03       |\n",
      "|    ep_rew_mean          | 1.77e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 359           |\n",
      "|    time_elapsed         | 12738         |\n",
      "|    total_timesteps      | 735232        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00020228545 |\n",
      "|    clip_fraction        | 0.000732      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.279        |\n",
      "|    explained_variance   | 0.53          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 199           |\n",
      "|    n_updates            | 3580          |\n",
      "|    policy_gradient_loss | -0.000241     |\n",
      "|    value_loss           | 664           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.09e+03      |\n",
      "|    ep_rew_mean          | 1.76e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 360           |\n",
      "|    time_elapsed         | 12774         |\n",
      "|    total_timesteps      | 737280        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00018577618 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.296        |\n",
      "|    explained_variance   | 0.762         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 194           |\n",
      "|    n_updates            | 3590          |\n",
      "|    policy_gradient_loss | -0.000148     |\n",
      "|    value_loss           | 488           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.1e+03       |\n",
      "|    ep_rew_mean          | 1.77e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 361           |\n",
      "|    time_elapsed         | 12809         |\n",
      "|    total_timesteps      | 739328        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00083994947 |\n",
      "|    clip_fraction        | 0.00728       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.308        |\n",
      "|    explained_variance   | 0.864         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 60.9          |\n",
      "|    n_updates            | 3600          |\n",
      "|    policy_gradient_loss | -0.0012       |\n",
      "|    value_loss           | 357           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.76e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 362          |\n",
      "|    time_elapsed         | 12846        |\n",
      "|    total_timesteps      | 741376       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0002929036 |\n",
      "|    clip_fraction        | 0.000342     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.311       |\n",
      "|    explained_variance   | 0.87         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 209          |\n",
      "|    n_updates            | 3610         |\n",
      "|    policy_gradient_loss | -0.000183    |\n",
      "|    value_loss           | 472          |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 1.11e+03   |\n",
      "|    ep_rew_mean          | 1.76e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 57         |\n",
      "|    iterations           | 363        |\n",
      "|    time_elapsed         | 12880      |\n",
      "|    total_timesteps      | 743424     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.00052912 |\n",
      "|    clip_fraction        | 0.000879   |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -0.309     |\n",
      "|    explained_variance   | 0.896      |\n",
      "|    learning_rate        | 1e-06      |\n",
      "|    loss                 | 88.7       |\n",
      "|    n_updates            | 3620       |\n",
      "|    policy_gradient_loss | -0.000297  |\n",
      "|    value_loss           | 302        |\n",
      "----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.77e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 364           |\n",
      "|    time_elapsed         | 12916         |\n",
      "|    total_timesteps      | 745472        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00019866676 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.231        |\n",
      "|    explained_variance   | 0.672         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 108           |\n",
      "|    n_updates            | 3630          |\n",
      "|    policy_gradient_loss | -0.000242     |\n",
      "|    value_loss           | 654           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.77e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 365          |\n",
      "|    time_elapsed         | 12952        |\n",
      "|    total_timesteps      | 747520       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008412822 |\n",
      "|    clip_fraction        | 0.00615      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.263       |\n",
      "|    explained_variance   | 0.774        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 129          |\n",
      "|    n_updates            | 3640         |\n",
      "|    policy_gradient_loss | -0.000318    |\n",
      "|    value_loss           | 379          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.79e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 366          |\n",
      "|    time_elapsed         | 12987        |\n",
      "|    total_timesteps      | 749568       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0015723081 |\n",
      "|    clip_fraction        | 0.0163       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.255       |\n",
      "|    explained_variance   | 0.799        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 339          |\n",
      "|    n_updates            | 3650         |\n",
      "|    policy_gradient_loss | -0.00126     |\n",
      "|    value_loss           | 472          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.12e+03      |\n",
      "|    ep_rew_mean          | 1.79e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 367           |\n",
      "|    time_elapsed         | 13023         |\n",
      "|    total_timesteps      | 751616        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00016887995 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.231        |\n",
      "|    explained_variance   | 0.702         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 230           |\n",
      "|    n_updates            | 3660          |\n",
      "|    policy_gradient_loss | -5.35e-05     |\n",
      "|    value_loss           | 528           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.8e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 368          |\n",
      "|    time_elapsed         | 13059        |\n",
      "|    total_timesteps      | 753664       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012897428 |\n",
      "|    clip_fraction        | 0.0203       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.277       |\n",
      "|    explained_variance   | 0.707        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 144          |\n",
      "|    n_updates            | 3670         |\n",
      "|    policy_gradient_loss | -0.00139     |\n",
      "|    value_loss           | 539          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.79e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 369          |\n",
      "|    time_elapsed         | 13094        |\n",
      "|    total_timesteps      | 755712       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012443591 |\n",
      "|    clip_fraction        | 0.0195       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.244       |\n",
      "|    explained_variance   | 0.683        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 115          |\n",
      "|    n_updates            | 3680         |\n",
      "|    policy_gradient_loss | -0.00112     |\n",
      "|    value_loss           | 545          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.8e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 370          |\n",
      "|    time_elapsed         | 13129        |\n",
      "|    total_timesteps      | 757760       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006275409 |\n",
      "|    clip_fraction        | 0.00518      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.235       |\n",
      "|    explained_variance   | 0.681        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 253          |\n",
      "|    n_updates            | 3690         |\n",
      "|    policy_gradient_loss | -0.000495    |\n",
      "|    value_loss           | 534          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.79e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 371          |\n",
      "|    time_elapsed         | 13165        |\n",
      "|    total_timesteps      | 759808       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005112451 |\n",
      "|    clip_fraction        | 0.000537     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.268       |\n",
      "|    explained_variance   | 0.801        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 118          |\n",
      "|    n_updates            | 3700         |\n",
      "|    policy_gradient_loss | -0.000228    |\n",
      "|    value_loss           | 364          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.09e+03     |\n",
      "|    ep_rew_mean          | 1.76e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 372          |\n",
      "|    time_elapsed         | 13202        |\n",
      "|    total_timesteps      | 761856       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014177145 |\n",
      "|    clip_fraction        | 0.00474      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.282       |\n",
      "|    explained_variance   | 0.858        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 466          |\n",
      "|    n_updates            | 3710         |\n",
      "|    policy_gradient_loss | -0.00104     |\n",
      "|    value_loss           | 447          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.09e+03     |\n",
      "|    ep_rew_mean          | 1.76e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 373          |\n",
      "|    time_elapsed         | 13236        |\n",
      "|    total_timesteps      | 763904       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0013400689 |\n",
      "|    clip_fraction        | 0.0261       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.254       |\n",
      "|    explained_variance   | 0.613        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 336          |\n",
      "|    n_updates            | 3720         |\n",
      "|    policy_gradient_loss | -0.002       |\n",
      "|    value_loss           | 660          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.09e+03      |\n",
      "|    ep_rew_mean          | 1.77e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 374           |\n",
      "|    time_elapsed         | 13272         |\n",
      "|    total_timesteps      | 765952        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00023772221 |\n",
      "|    clip_fraction        | 9.77e-05      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.276        |\n",
      "|    explained_variance   | 0.685         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 335           |\n",
      "|    n_updates            | 3730          |\n",
      "|    policy_gradient_loss | -0.000463     |\n",
      "|    value_loss           | 705           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.78e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 375          |\n",
      "|    time_elapsed         | 13307        |\n",
      "|    total_timesteps      | 768000       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0009435963 |\n",
      "|    clip_fraction        | 0.00815      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.291       |\n",
      "|    explained_variance   | 0.708        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 145          |\n",
      "|    n_updates            | 3740         |\n",
      "|    policy_gradient_loss | 3.63e-05     |\n",
      "|    value_loss           | 360          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.06e+03      |\n",
      "|    ep_rew_mean          | 1.75e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 376           |\n",
      "|    time_elapsed         | 13343         |\n",
      "|    total_timesteps      | 770048        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00044188308 |\n",
      "|    clip_fraction        | 0.000146      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.286        |\n",
      "|    explained_variance   | 0.74          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 131           |\n",
      "|    n_updates            | 3750          |\n",
      "|    policy_gradient_loss | 0.000232      |\n",
      "|    value_loss           | 525           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.07e+03     |\n",
      "|    ep_rew_mean          | 1.75e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 377          |\n",
      "|    time_elapsed         | 13379        |\n",
      "|    total_timesteps      | 772096       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006856364 |\n",
      "|    clip_fraction        | 0.00571      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.268       |\n",
      "|    explained_variance   | 0.645        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 218          |\n",
      "|    n_updates            | 3760         |\n",
      "|    policy_gradient_loss | -0.000647    |\n",
      "|    value_loss           | 664          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.06e+03      |\n",
      "|    ep_rew_mean          | 1.74e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 378           |\n",
      "|    time_elapsed         | 13414         |\n",
      "|    total_timesteps      | 774144        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00097299117 |\n",
      "|    clip_fraction        | 0.0152        |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.35         |\n",
      "|    explained_variance   | 0.847         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 164           |\n",
      "|    n_updates            | 3770          |\n",
      "|    policy_gradient_loss | -0.00104      |\n",
      "|    value_loss           | 323           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.07e+03      |\n",
      "|    ep_rew_mean          | 1.75e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 379           |\n",
      "|    time_elapsed         | 13450         |\n",
      "|    total_timesteps      | 776192        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00047486075 |\n",
      "|    clip_fraction        | 0.000732      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.335        |\n",
      "|    explained_variance   | 0.857         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 170           |\n",
      "|    n_updates            | 3780          |\n",
      "|    policy_gradient_loss | -0.000657     |\n",
      "|    value_loss           | 366           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.06e+03      |\n",
      "|    ep_rew_mean          | 1.74e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 380           |\n",
      "|    time_elapsed         | 13484         |\n",
      "|    total_timesteps      | 778240        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00032116935 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.369        |\n",
      "|    explained_variance   | 0.772         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 282           |\n",
      "|    n_updates            | 3790          |\n",
      "|    policy_gradient_loss | -0.000639     |\n",
      "|    value_loss           | 391           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.07e+03     |\n",
      "|    ep_rew_mean          | 1.75e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 381          |\n",
      "|    time_elapsed         | 13521        |\n",
      "|    total_timesteps      | 780288       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014662126 |\n",
      "|    clip_fraction        | 0.0156       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.352       |\n",
      "|    explained_variance   | 0.801        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 134          |\n",
      "|    n_updates            | 3800         |\n",
      "|    policy_gradient_loss | -0.00129     |\n",
      "|    value_loss           | 485          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.06e+03     |\n",
      "|    ep_rew_mean          | 1.76e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 382          |\n",
      "|    time_elapsed         | 13554        |\n",
      "|    total_timesteps      | 782336       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011530232 |\n",
      "|    clip_fraction        | 0.0083       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.371       |\n",
      "|    explained_variance   | 0.842        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 147          |\n",
      "|    n_updates            | 3810         |\n",
      "|    policy_gradient_loss | -0.00033     |\n",
      "|    value_loss           | 306          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.06e+03     |\n",
      "|    ep_rew_mean          | 1.76e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 383          |\n",
      "|    time_elapsed         | 13589        |\n",
      "|    total_timesteps      | 784384       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0026759012 |\n",
      "|    clip_fraction        | 0.0225       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.361       |\n",
      "|    explained_variance   | 0.836        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 132          |\n",
      "|    n_updates            | 3820         |\n",
      "|    policy_gradient_loss | -0.00121     |\n",
      "|    value_loss           | 351          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.08e+03      |\n",
      "|    ep_rew_mean          | 1.77e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 384           |\n",
      "|    time_elapsed         | 13625         |\n",
      "|    total_timesteps      | 786432        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00014186196 |\n",
      "|    clip_fraction        | 0.000293      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.364        |\n",
      "|    explained_variance   | 0.827         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 157           |\n",
      "|    n_updates            | 3830          |\n",
      "|    policy_gradient_loss | -0.000163     |\n",
      "|    value_loss           | 423           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.07e+03     |\n",
      "|    ep_rew_mean          | 1.77e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 385          |\n",
      "|    time_elapsed         | 13660        |\n",
      "|    total_timesteps      | 788480       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0019518762 |\n",
      "|    clip_fraction        | 0.0042       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.366       |\n",
      "|    explained_variance   | 0.866        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 57.6         |\n",
      "|    n_updates            | 3840         |\n",
      "|    policy_gradient_loss | -0.000357    |\n",
      "|    value_loss           | 402          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.08e+03     |\n",
      "|    ep_rew_mean          | 1.79e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 386          |\n",
      "|    time_elapsed         | 13695        |\n",
      "|    total_timesteps      | 790528       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005490037 |\n",
      "|    clip_fraction        | 0.00117      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.298       |\n",
      "|    explained_variance   | 0.814        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 143          |\n",
      "|    n_updates            | 3850         |\n",
      "|    policy_gradient_loss | -0.00109     |\n",
      "|    value_loss           | 367          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.08e+03     |\n",
      "|    ep_rew_mean          | 1.79e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 387          |\n",
      "|    time_elapsed         | 13731        |\n",
      "|    total_timesteps      | 792576       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012981128 |\n",
      "|    clip_fraction        | 0.00503      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.333       |\n",
      "|    explained_variance   | 0.716        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 134          |\n",
      "|    n_updates            | 3860         |\n",
      "|    policy_gradient_loss | -0.00034     |\n",
      "|    value_loss           | 645          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.09e+03    |\n",
      "|    ep_rew_mean          | 1.79e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 388         |\n",
      "|    time_elapsed         | 13766       |\n",
      "|    total_timesteps      | 794624      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.000585414 |\n",
      "|    clip_fraction        | 0.00308     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.397      |\n",
      "|    explained_variance   | 0.883       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 66.1        |\n",
      "|    n_updates            | 3870        |\n",
      "|    policy_gradient_loss | -0.000694   |\n",
      "|    value_loss           | 326         |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.1e+03       |\n",
      "|    ep_rew_mean          | 1.79e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 389           |\n",
      "|    time_elapsed         | 13802         |\n",
      "|    total_timesteps      | 796672        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00043816632 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.395        |\n",
      "|    explained_variance   | 0.822         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 240           |\n",
      "|    n_updates            | 3880          |\n",
      "|    policy_gradient_loss | 2.11e-06      |\n",
      "|    value_loss           | 524           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.8e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 390          |\n",
      "|    time_elapsed         | 13837        |\n",
      "|    total_timesteps      | 798720       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011560476 |\n",
      "|    clip_fraction        | 0.00186      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.444       |\n",
      "|    explained_variance   | 0.929        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 55.3         |\n",
      "|    n_updates            | 3890         |\n",
      "|    policy_gradient_loss | -0.000421    |\n",
      "|    value_loss           | 289          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.09e+03      |\n",
      "|    ep_rew_mean          | 1.79e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 391           |\n",
      "|    time_elapsed         | 13873         |\n",
      "|    total_timesteps      | 800768        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00028276982 |\n",
      "|    clip_fraction        | 4.88e-05      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.329        |\n",
      "|    explained_variance   | 0.753         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 155           |\n",
      "|    n_updates            | 3900          |\n",
      "|    policy_gradient_loss | -0.00071      |\n",
      "|    value_loss           | 455           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.1e+03       |\n",
      "|    ep_rew_mean          | 1.77e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 392           |\n",
      "|    time_elapsed         | 13906         |\n",
      "|    total_timesteps      | 802816        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00022002362 |\n",
      "|    clip_fraction        | 4.88e-05      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.345        |\n",
      "|    explained_variance   | 0.823         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 107           |\n",
      "|    n_updates            | 3910          |\n",
      "|    policy_gradient_loss | -0.000199     |\n",
      "|    value_loss           | 479           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.78e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 393          |\n",
      "|    time_elapsed         | 13941        |\n",
      "|    total_timesteps      | 804864       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0015827046 |\n",
      "|    clip_fraction        | 0.00293      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.458       |\n",
      "|    explained_variance   | 0.93         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 37.5         |\n",
      "|    n_updates            | 3920         |\n",
      "|    policy_gradient_loss | -0.00135     |\n",
      "|    value_loss           | 187          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.78e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 394          |\n",
      "|    time_elapsed         | 13977        |\n",
      "|    total_timesteps      | 806912       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008926791 |\n",
      "|    clip_fraction        | 4.88e-05     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.326       |\n",
      "|    explained_variance   | 0.841        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 204          |\n",
      "|    n_updates            | 3930         |\n",
      "|    policy_gradient_loss | -0.000821    |\n",
      "|    value_loss           | 501          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.8e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 395          |\n",
      "|    time_elapsed         | 14012        |\n",
      "|    total_timesteps      | 808960       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005086516 |\n",
      "|    clip_fraction        | 0.00347      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.31        |\n",
      "|    explained_variance   | 0.574        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 259          |\n",
      "|    n_updates            | 3940         |\n",
      "|    policy_gradient_loss | 0.000825     |\n",
      "|    value_loss           | 728          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.8e+03       |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 396           |\n",
      "|    time_elapsed         | 14049         |\n",
      "|    total_timesteps      | 811008        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00049799716 |\n",
      "|    clip_fraction        | 0.000928      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.328        |\n",
      "|    explained_variance   | 0.604         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 462           |\n",
      "|    n_updates            | 3950          |\n",
      "|    policy_gradient_loss | -0.000459     |\n",
      "|    value_loss           | 632           |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.11e+03    |\n",
      "|    ep_rew_mean          | 1.81e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 397         |\n",
      "|    time_elapsed         | 14084       |\n",
      "|    total_timesteps      | 813056      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001960588 |\n",
      "|    clip_fraction        | 0.0254      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.41       |\n",
      "|    explained_variance   | 0.849       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 164         |\n",
      "|    n_updates            | 3960        |\n",
      "|    policy_gradient_loss | -0.00263    |\n",
      "|    value_loss           | 442         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.81e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 398          |\n",
      "|    time_elapsed         | 14119        |\n",
      "|    total_timesteps      | 815104       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0017867704 |\n",
      "|    clip_fraction        | 0.00498      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.36        |\n",
      "|    explained_variance   | 0.727        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 273          |\n",
      "|    n_updates            | 3970         |\n",
      "|    policy_gradient_loss | 6.48e-05     |\n",
      "|    value_loss           | 475          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.12e+03    |\n",
      "|    ep_rew_mean          | 1.82e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 399         |\n",
      "|    time_elapsed         | 14154       |\n",
      "|    total_timesteps      | 817152      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001062734 |\n",
      "|    clip_fraction        | 0.00171     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.399      |\n",
      "|    explained_variance   | 0.715       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 199         |\n",
      "|    n_updates            | 3980        |\n",
      "|    policy_gradient_loss | -0.000667   |\n",
      "|    value_loss           | 574         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.84e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 400          |\n",
      "|    time_elapsed         | 14189        |\n",
      "|    total_timesteps      | 819200       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0029823626 |\n",
      "|    clip_fraction        | 0.017        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.407       |\n",
      "|    explained_variance   | 0.876        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 74.1         |\n",
      "|    n_updates            | 3990         |\n",
      "|    policy_gradient_loss | -0.000884    |\n",
      "|    value_loss           | 305          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.83e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 401           |\n",
      "|    time_elapsed         | 14226         |\n",
      "|    total_timesteps      | 821248        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00093266054 |\n",
      "|    clip_fraction        | 0.00146       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.363        |\n",
      "|    explained_variance   | 0.829         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 131           |\n",
      "|    n_updates            | 4000          |\n",
      "|    policy_gradient_loss | -0.000113     |\n",
      "|    value_loss           | 337           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.82e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 402          |\n",
      "|    time_elapsed         | 14260        |\n",
      "|    total_timesteps      | 823296       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0021158915 |\n",
      "|    clip_fraction        | 0.00791      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.369       |\n",
      "|    explained_variance   | 0.734        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 281          |\n",
      "|    n_updates            | 4010         |\n",
      "|    policy_gradient_loss | -0.000725    |\n",
      "|    value_loss           | 520          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.84e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 403          |\n",
      "|    time_elapsed         | 14295        |\n",
      "|    total_timesteps      | 825344       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0020204857 |\n",
      "|    clip_fraction        | 0.0082       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.329       |\n",
      "|    explained_variance   | 0.777        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 145          |\n",
      "|    n_updates            | 4020         |\n",
      "|    policy_gradient_loss | -0.00136     |\n",
      "|    value_loss           | 440          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.85e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 404          |\n",
      "|    time_elapsed         | 14331        |\n",
      "|    total_timesteps      | 827392       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0015400129 |\n",
      "|    clip_fraction        | 0.0107       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.287       |\n",
      "|    explained_variance   | 0.852        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 289          |\n",
      "|    n_updates            | 4030         |\n",
      "|    policy_gradient_loss | -0.00105     |\n",
      "|    value_loss           | 406          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.12e+03      |\n",
      "|    ep_rew_mean          | 1.86e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 405           |\n",
      "|    time_elapsed         | 14366         |\n",
      "|    total_timesteps      | 829440        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 3.2352487e-05 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.263        |\n",
      "|    explained_variance   | 0.782         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 360           |\n",
      "|    n_updates            | 4040          |\n",
      "|    policy_gradient_loss | -0.000177     |\n",
      "|    value_loss           | 554           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.12e+03      |\n",
      "|    ep_rew_mean          | 1.86e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 406           |\n",
      "|    time_elapsed         | 14402         |\n",
      "|    total_timesteps      | 831488        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00070771656 |\n",
      "|    clip_fraction        | 0.00205       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.25         |\n",
      "|    explained_variance   | 0.835         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 187           |\n",
      "|    n_updates            | 4050          |\n",
      "|    policy_gradient_loss | -0.00107      |\n",
      "|    value_loss           | 486           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.87e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 407          |\n",
      "|    time_elapsed         | 14437        |\n",
      "|    total_timesteps      | 833536       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006359105 |\n",
      "|    clip_fraction        | 0.00361      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.254       |\n",
      "|    explained_variance   | 0.83         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 155          |\n",
      "|    n_updates            | 4060         |\n",
      "|    policy_gradient_loss | -0.000754    |\n",
      "|    value_loss           | 319          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.14e+03     |\n",
      "|    ep_rew_mean          | 1.88e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 408          |\n",
      "|    time_elapsed         | 14473        |\n",
      "|    total_timesteps      | 835584       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008220359 |\n",
      "|    clip_fraction        | 0.00283      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.26        |\n",
      "|    explained_variance   | 0.881        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 272          |\n",
      "|    n_updates            | 4070         |\n",
      "|    policy_gradient_loss | -0.000466    |\n",
      "|    value_loss           | 373          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.86e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 409          |\n",
      "|    time_elapsed         | 14507        |\n",
      "|    total_timesteps      | 837632       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011653814 |\n",
      "|    clip_fraction        | 0.0161       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.249       |\n",
      "|    explained_variance   | 0.856        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 72.5         |\n",
      "|    n_updates            | 4080         |\n",
      "|    policy_gradient_loss | -0.00159     |\n",
      "|    value_loss           | 367          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.87e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 410          |\n",
      "|    time_elapsed         | 14542        |\n",
      "|    total_timesteps      | 839680       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006294432 |\n",
      "|    clip_fraction        | 0.0118       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.239       |\n",
      "|    explained_variance   | 0.685        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 379          |\n",
      "|    n_updates            | 4090         |\n",
      "|    policy_gradient_loss | -0.00054     |\n",
      "|    value_loss           | 532          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.14e+03      |\n",
      "|    ep_rew_mean          | 1.89e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 411           |\n",
      "|    time_elapsed         | 14579         |\n",
      "|    total_timesteps      | 841728        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00039327797 |\n",
      "|    clip_fraction        | 0.00176       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.305        |\n",
      "|    explained_variance   | 0.895         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 100           |\n",
      "|    n_updates            | 4100          |\n",
      "|    policy_gradient_loss | -0.000738     |\n",
      "|    value_loss           | 288           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.14e+03      |\n",
      "|    ep_rew_mean          | 1.88e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 412           |\n",
      "|    time_elapsed         | 14613         |\n",
      "|    total_timesteps      | 843776        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00026893488 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.339        |\n",
      "|    explained_variance   | 0.855         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 263           |\n",
      "|    n_updates            | 4110          |\n",
      "|    policy_gradient_loss | -0.000364     |\n",
      "|    value_loss           | 507           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.85e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 413          |\n",
      "|    time_elapsed         | 14648        |\n",
      "|    total_timesteps      | 845824       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0002204729 |\n",
      "|    clip_fraction        | 0.000244     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.35        |\n",
      "|    explained_variance   | 0.891        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 123          |\n",
      "|    n_updates            | 4120         |\n",
      "|    policy_gradient_loss | -0.000169    |\n",
      "|    value_loss           | 388          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.84e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 414          |\n",
      "|    time_elapsed         | 14684        |\n",
      "|    total_timesteps      | 847872       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0009133022 |\n",
      "|    clip_fraction        | 0.0134       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.254       |\n",
      "|    explained_variance   | 0.564        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 593          |\n",
      "|    n_updates            | 4130         |\n",
      "|    policy_gradient_loss | -0.00114     |\n",
      "|    value_loss           | 772          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.12e+03    |\n",
      "|    ep_rew_mean          | 1.85e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 415         |\n",
      "|    time_elapsed         | 14719       |\n",
      "|    total_timesteps      | 849920      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.000565857 |\n",
      "|    clip_fraction        | 0.00547     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.342      |\n",
      "|    explained_variance   | 0.891       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 125         |\n",
      "|    n_updates            | 4140        |\n",
      "|    policy_gradient_loss | -0.000231   |\n",
      "|    value_loss           | 260         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.86e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 416          |\n",
      "|    time_elapsed         | 14755        |\n",
      "|    total_timesteps      | 851968       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012830612 |\n",
      "|    clip_fraction        | 0.0042       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.33        |\n",
      "|    explained_variance   | 0.863        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 225          |\n",
      "|    n_updates            | 4150         |\n",
      "|    policy_gradient_loss | -0.00045     |\n",
      "|    value_loss           | 343          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.12e+03      |\n",
      "|    ep_rew_mean          | 1.87e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 417           |\n",
      "|    time_elapsed         | 14790         |\n",
      "|    total_timesteps      | 854016        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00015651985 |\n",
      "|    clip_fraction        | 0.000684      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.27         |\n",
      "|    explained_variance   | 0.719         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 203           |\n",
      "|    n_updates            | 4160          |\n",
      "|    policy_gradient_loss | -0.000536     |\n",
      "|    value_loss           | 467           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.89e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 418          |\n",
      "|    time_elapsed         | 14826        |\n",
      "|    total_timesteps      | 856064       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0009464185 |\n",
      "|    clip_fraction        | 0.00679      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.305       |\n",
      "|    explained_variance   | 0.781        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 152          |\n",
      "|    n_updates            | 4170         |\n",
      "|    policy_gradient_loss | -0.000764    |\n",
      "|    value_loss           | 472          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.13e+03      |\n",
      "|    ep_rew_mean          | 1.88e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 419           |\n",
      "|    time_elapsed         | 14861         |\n",
      "|    total_timesteps      | 858112        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00046676112 |\n",
      "|    clip_fraction        | 0.000732      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.346        |\n",
      "|    explained_variance   | 0.713         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 460           |\n",
      "|    n_updates            | 4180          |\n",
      "|    policy_gradient_loss | -0.000476     |\n",
      "|    value_loss           | 508           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.88e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 420          |\n",
      "|    time_elapsed         | 14897        |\n",
      "|    total_timesteps      | 860160       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0015998955 |\n",
      "|    clip_fraction        | 0.0171       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.436       |\n",
      "|    explained_variance   | 0.822        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 148          |\n",
      "|    n_updates            | 4190         |\n",
      "|    policy_gradient_loss | -0.00151     |\n",
      "|    value_loss           | 500          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.14e+03     |\n",
      "|    ep_rew_mean          | 1.89e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 421          |\n",
      "|    time_elapsed         | 14932        |\n",
      "|    total_timesteps      | 862208       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0022180867 |\n",
      "|    clip_fraction        | 0.0142       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.445       |\n",
      "|    explained_variance   | 0.929        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 54.6         |\n",
      "|    n_updates            | 4200         |\n",
      "|    policy_gradient_loss | -0.00111     |\n",
      "|    value_loss           | 347          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.87e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 422          |\n",
      "|    time_elapsed         | 14967        |\n",
      "|    total_timesteps      | 864256       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0018606491 |\n",
      "|    clip_fraction        | 0.0193       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.344       |\n",
      "|    explained_variance   | 0.911        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 130          |\n",
      "|    n_updates            | 4210         |\n",
      "|    policy_gradient_loss | -0.00227     |\n",
      "|    value_loss           | 332          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.14e+03      |\n",
      "|    ep_rew_mean          | 1.87e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 423           |\n",
      "|    time_elapsed         | 15003         |\n",
      "|    total_timesteps      | 866304        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00071522914 |\n",
      "|    clip_fraction        | 0.00396       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.342        |\n",
      "|    explained_variance   | 0.851         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 583           |\n",
      "|    n_updates            | 4220          |\n",
      "|    policy_gradient_loss | -0.000711     |\n",
      "|    value_loss           | 633           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.14e+03     |\n",
      "|    ep_rew_mean          | 1.87e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 424          |\n",
      "|    time_elapsed         | 15038        |\n",
      "|    total_timesteps      | 868352       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007731721 |\n",
      "|    clip_fraction        | 0.0104       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.343       |\n",
      "|    explained_variance   | 0.699        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 251          |\n",
      "|    n_updates            | 4230         |\n",
      "|    policy_gradient_loss | -0.00127     |\n",
      "|    value_loss           | 410          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.15e+03     |\n",
      "|    ep_rew_mean          | 1.88e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 425          |\n",
      "|    time_elapsed         | 15073        |\n",
      "|    total_timesteps      | 870400       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012216798 |\n",
      "|    clip_fraction        | 0.00986      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.365       |\n",
      "|    explained_variance   | 0.657        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 436          |\n",
      "|    n_updates            | 4240         |\n",
      "|    policy_gradient_loss | -0.000856    |\n",
      "|    value_loss           | 616          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.15e+03     |\n",
      "|    ep_rew_mean          | 1.88e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 426          |\n",
      "|    time_elapsed         | 15109        |\n",
      "|    total_timesteps      | 872448       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0004623351 |\n",
      "|    clip_fraction        | 0.000244     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.425       |\n",
      "|    explained_variance   | 0.839        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 93.2         |\n",
      "|    n_updates            | 4250         |\n",
      "|    policy_gradient_loss | -0.000143    |\n",
      "|    value_loss           | 347          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 427          |\n",
      "|    time_elapsed         | 15144        |\n",
      "|    total_timesteps      | 874496       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011488396 |\n",
      "|    clip_fraction        | 0.0103       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.394       |\n",
      "|    explained_variance   | 0.894        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 187          |\n",
      "|    n_updates            | 4260         |\n",
      "|    policy_gradient_loss | -0.00118     |\n",
      "|    value_loss           | 284          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.18e+03      |\n",
      "|    ep_rew_mean          | 1.91e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 428           |\n",
      "|    time_elapsed         | 15180         |\n",
      "|    total_timesteps      | 876544        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00095531263 |\n",
      "|    clip_fraction        | 0.00317       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.455        |\n",
      "|    explained_variance   | 0.932         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 122           |\n",
      "|    n_updates            | 4270          |\n",
      "|    policy_gradient_loss | -0.00112      |\n",
      "|    value_loss           | 260           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.18e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 429          |\n",
      "|    time_elapsed         | 15215        |\n",
      "|    total_timesteps      | 878592       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0004194949 |\n",
      "|    clip_fraction        | 0.000977     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.382       |\n",
      "|    explained_variance   | 0.734        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 96.3         |\n",
      "|    n_updates            | 4280         |\n",
      "|    policy_gradient_loss | -0.00104     |\n",
      "|    value_loss           | 543          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.18e+03     |\n",
      "|    ep_rew_mean          | 1.93e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 430          |\n",
      "|    time_elapsed         | 15252        |\n",
      "|    total_timesteps      | 880640       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007457136 |\n",
      "|    clip_fraction        | 0.00762      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.365       |\n",
      "|    explained_variance   | 0.752        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 672          |\n",
      "|    n_updates            | 4290         |\n",
      "|    policy_gradient_loss | -0.000639    |\n",
      "|    value_loss           | 478          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.19e+03     |\n",
      "|    ep_rew_mean          | 1.93e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 431          |\n",
      "|    time_elapsed         | 15287        |\n",
      "|    total_timesteps      | 882688       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0023297763 |\n",
      "|    clip_fraction        | 0.0231       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.388       |\n",
      "|    explained_variance   | 0.93         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 120          |\n",
      "|    n_updates            | 4300         |\n",
      "|    policy_gradient_loss | -0.00253     |\n",
      "|    value_loss           | 265          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.18e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 432          |\n",
      "|    time_elapsed         | 15322        |\n",
      "|    total_timesteps      | 884736       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007709734 |\n",
      "|    clip_fraction        | 0.00186      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.351       |\n",
      "|    explained_variance   | 0.857        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 235          |\n",
      "|    n_updates            | 4310         |\n",
      "|    policy_gradient_loss | -0.000635    |\n",
      "|    value_loss           | 507          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.2e+03      |\n",
      "|    ep_rew_mean          | 1.93e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 433          |\n",
      "|    time_elapsed         | 15358        |\n",
      "|    total_timesteps      | 886784       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012922257 |\n",
      "|    clip_fraction        | 0.00527      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.404       |\n",
      "|    explained_variance   | 0.765        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 133          |\n",
      "|    n_updates            | 4320         |\n",
      "|    policy_gradient_loss | -0.00117     |\n",
      "|    value_loss           | 395          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.2e+03     |\n",
      "|    ep_rew_mean          | 1.93e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 434         |\n",
      "|    time_elapsed         | 15393       |\n",
      "|    total_timesteps      | 888832      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.002223869 |\n",
      "|    clip_fraction        | 0.00659     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.558      |\n",
      "|    explained_variance   | 0.928       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 117         |\n",
      "|    n_updates            | 4330        |\n",
      "|    policy_gradient_loss | -0.000919   |\n",
      "|    value_loss           | 254         |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.21e+03      |\n",
      "|    ep_rew_mean          | 1.94e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 435           |\n",
      "|    time_elapsed         | 15429         |\n",
      "|    total_timesteps      | 890880        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00090748817 |\n",
      "|    clip_fraction        | 0.00937       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.279        |\n",
      "|    explained_variance   | 0.81          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 114           |\n",
      "|    n_updates            | 4340          |\n",
      "|    policy_gradient_loss | -0.000657     |\n",
      "|    value_loss           | 453           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.21e+03     |\n",
      "|    ep_rew_mean          | 1.95e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 436          |\n",
      "|    time_elapsed         | 15464        |\n",
      "|    total_timesteps      | 892928       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008467459 |\n",
      "|    clip_fraction        | 0.0116       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.31        |\n",
      "|    explained_variance   | 0.774        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 135          |\n",
      "|    n_updates            | 4350         |\n",
      "|    policy_gradient_loss | -0.00138     |\n",
      "|    value_loss           | 490          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.21e+03      |\n",
      "|    ep_rew_mean          | 1.94e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 437           |\n",
      "|    time_elapsed         | 15499         |\n",
      "|    total_timesteps      | 894976        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00083601795 |\n",
      "|    clip_fraction        | 0.00459       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.309        |\n",
      "|    explained_variance   | 0.792         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 214           |\n",
      "|    n_updates            | 4360          |\n",
      "|    policy_gradient_loss | -0.00146      |\n",
      "|    value_loss           | 389           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.2e+03       |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 438           |\n",
      "|    time_elapsed         | 15535         |\n",
      "|    total_timesteps      | 897024        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00045319277 |\n",
      "|    clip_fraction        | 9.77e-05      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.414        |\n",
      "|    explained_variance   | 0.83          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 201           |\n",
      "|    n_updates            | 4370          |\n",
      "|    policy_gradient_loss | -0.00032      |\n",
      "|    value_loss           | 428           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.21e+03     |\n",
      "|    ep_rew_mean          | 1.94e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 439          |\n",
      "|    time_elapsed         | 15570        |\n",
      "|    total_timesteps      | 899072       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006562924 |\n",
      "|    clip_fraction        | 0.00117      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.311       |\n",
      "|    explained_variance   | 0.857        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 185          |\n",
      "|    n_updates            | 4380         |\n",
      "|    policy_gradient_loss | -0.00092     |\n",
      "|    value_loss           | 323          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.21e+03      |\n",
      "|    ep_rew_mean          | 1.93e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 440           |\n",
      "|    time_elapsed         | 15606         |\n",
      "|    total_timesteps      | 901120        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00056360266 |\n",
      "|    clip_fraction        | 0.00264       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.33         |\n",
      "|    explained_variance   | 0.798         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 143           |\n",
      "|    n_updates            | 4390          |\n",
      "|    policy_gradient_loss | -0.000186     |\n",
      "|    value_loss           | 401           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.2e+03      |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 441          |\n",
      "|    time_elapsed         | 15640        |\n",
      "|    total_timesteps      | 903168       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0001997485 |\n",
      "|    clip_fraction        | 9.77e-05     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.358       |\n",
      "|    explained_variance   | 0.893        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 208          |\n",
      "|    n_updates            | 4400         |\n",
      "|    policy_gradient_loss | -0.00049     |\n",
      "|    value_loss           | 425          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.18e+03     |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 442          |\n",
      "|    time_elapsed         | 15675        |\n",
      "|    total_timesteps      | 905216       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006484248 |\n",
      "|    clip_fraction        | 0.00444      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.325       |\n",
      "|    explained_variance   | 0.863        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 139          |\n",
      "|    n_updates            | 4410         |\n",
      "|    policy_gradient_loss | -0.000542    |\n",
      "|    value_loss           | 423          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.18e+03    |\n",
      "|    ep_rew_mean          | 1.9e+03     |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 443         |\n",
      "|    time_elapsed         | 15711       |\n",
      "|    total_timesteps      | 907264      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001155766 |\n",
      "|    clip_fraction        | 0.00552     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.364      |\n",
      "|    explained_variance   | 0.849       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 203         |\n",
      "|    n_updates            | 4420        |\n",
      "|    policy_gradient_loss | -0.000577   |\n",
      "|    value_loss           | 341         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.18e+03     |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 444          |\n",
      "|    time_elapsed         | 15746        |\n",
      "|    total_timesteps      | 909312       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005611738 |\n",
      "|    clip_fraction        | 0.00425      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.308       |\n",
      "|    explained_variance   | 0.632        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 182          |\n",
      "|    n_updates            | 4430         |\n",
      "|    policy_gradient_loss | -0.000465    |\n",
      "|    value_loss           | 553          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.18e+03     |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 445          |\n",
      "|    time_elapsed         | 15782        |\n",
      "|    total_timesteps      | 911360       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012644758 |\n",
      "|    clip_fraction        | 0.00405      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.324       |\n",
      "|    explained_variance   | 0.821        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 188          |\n",
      "|    n_updates            | 4440         |\n",
      "|    policy_gradient_loss | -0.00076     |\n",
      "|    value_loss           | 333          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.18e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 446           |\n",
      "|    time_elapsed         | 15817         |\n",
      "|    total_timesteps      | 913408        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00079940644 |\n",
      "|    clip_fraction        | 0.00376       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.293        |\n",
      "|    explained_variance   | 0.608         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 152           |\n",
      "|    n_updates            | 4450          |\n",
      "|    policy_gradient_loss | -0.000331     |\n",
      "|    value_loss           | 632           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.17e+03     |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 447          |\n",
      "|    time_elapsed         | 15853        |\n",
      "|    total_timesteps      | 915456       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007625577 |\n",
      "|    clip_fraction        | 0.00425      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.335       |\n",
      "|    explained_variance   | 0.692        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 155          |\n",
      "|    n_updates            | 4460         |\n",
      "|    policy_gradient_loss | -2.51e-05    |\n",
      "|    value_loss           | 519          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.17e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 448           |\n",
      "|    time_elapsed         | 15888         |\n",
      "|    total_timesteps      | 917504        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00078270724 |\n",
      "|    clip_fraction        | 0.00337       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.362        |\n",
      "|    explained_variance   | 0.614         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 173           |\n",
      "|    n_updates            | 4470          |\n",
      "|    policy_gradient_loss | -0.000583     |\n",
      "|    value_loss           | 715           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.16e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 449           |\n",
      "|    time_elapsed         | 15924         |\n",
      "|    total_timesteps      | 919552        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00067747105 |\n",
      "|    clip_fraction        | 0.00977       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.396        |\n",
      "|    explained_variance   | 0.832         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 262           |\n",
      "|    n_updates            | 4480          |\n",
      "|    policy_gradient_loss | -0.00107      |\n",
      "|    value_loss           | 310           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.15e+03     |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 450          |\n",
      "|    time_elapsed         | 15960        |\n",
      "|    total_timesteps      | 921600       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014553212 |\n",
      "|    clip_fraction        | 0.0152       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.401       |\n",
      "|    explained_variance   | 0.889        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 139          |\n",
      "|    n_updates            | 4490         |\n",
      "|    policy_gradient_loss | -0.00175     |\n",
      "|    value_loss           | 301          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.14e+03      |\n",
      "|    ep_rew_mean          | 1.89e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 451           |\n",
      "|    time_elapsed         | 15995         |\n",
      "|    total_timesteps      | 923648        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00029383693 |\n",
      "|    clip_fraction        | 0.000879      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.338        |\n",
      "|    explained_variance   | 0.794         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 95.4          |\n",
      "|    n_updates            | 4500          |\n",
      "|    policy_gradient_loss | -0.00038      |\n",
      "|    value_loss           | 359           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.15e+03      |\n",
      "|    ep_rew_mean          | 1.9e+03       |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 452           |\n",
      "|    time_elapsed         | 16031         |\n",
      "|    total_timesteps      | 925696        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00032100917 |\n",
      "|    clip_fraction        | 0.000391      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.389        |\n",
      "|    explained_variance   | 0.844         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 353           |\n",
      "|    n_updates            | 4510          |\n",
      "|    policy_gradient_loss | -0.000482     |\n",
      "|    value_loss           | 453           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.15e+03     |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 453          |\n",
      "|    time_elapsed         | 16067        |\n",
      "|    total_timesteps      | 927744       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010114808 |\n",
      "|    clip_fraction        | 0.0113       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.351       |\n",
      "|    explained_variance   | 0.804        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 102          |\n",
      "|    n_updates            | 4520         |\n",
      "|    policy_gradient_loss | -0.00168     |\n",
      "|    value_loss           | 371          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.15e+03     |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 454          |\n",
      "|    time_elapsed         | 16102        |\n",
      "|    total_timesteps      | 929792       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012092697 |\n",
      "|    clip_fraction        | 0.014        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.441       |\n",
      "|    explained_variance   | 0.761        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 424          |\n",
      "|    n_updates            | 4530         |\n",
      "|    policy_gradient_loss | -0.000744    |\n",
      "|    value_loss           | 619          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 455          |\n",
      "|    time_elapsed         | 16138        |\n",
      "|    total_timesteps      | 931840       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0004967282 |\n",
      "|    clip_fraction        | 0.00146      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.451       |\n",
      "|    explained_variance   | 0.863        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 66.6         |\n",
      "|    n_updates            | 4540         |\n",
      "|    policy_gradient_loss | -0.000554    |\n",
      "|    value_loss           | 284          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.17e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 456          |\n",
      "|    time_elapsed         | 16173        |\n",
      "|    total_timesteps      | 933888       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006321134 |\n",
      "|    clip_fraction        | 0.00386      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.506       |\n",
      "|    explained_variance   | 0.774        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 219          |\n",
      "|    n_updates            | 4550         |\n",
      "|    policy_gradient_loss | -0.000325    |\n",
      "|    value_loss           | 526          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.16e+03      |\n",
      "|    ep_rew_mean          | 1.9e+03       |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 457           |\n",
      "|    time_elapsed         | 16208         |\n",
      "|    total_timesteps      | 935936        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00031660293 |\n",
      "|    clip_fraction        | 0.000391      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.434        |\n",
      "|    explained_variance   | 0.825         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 199           |\n",
      "|    n_updates            | 4560          |\n",
      "|    policy_gradient_loss | -0.000322     |\n",
      "|    value_loss           | 521           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.15e+03      |\n",
      "|    ep_rew_mean          | 1.89e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 458           |\n",
      "|    time_elapsed         | 16244         |\n",
      "|    total_timesteps      | 937984        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00032783076 |\n",
      "|    clip_fraction        | 0.00103       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.426        |\n",
      "|    explained_variance   | 0.785         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 230           |\n",
      "|    n_updates            | 4570          |\n",
      "|    policy_gradient_loss | -0.000481     |\n",
      "|    value_loss           | 478           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.15e+03     |\n",
      "|    ep_rew_mean          | 1.89e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 459          |\n",
      "|    time_elapsed         | 16280        |\n",
      "|    total_timesteps      | 940032       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012703175 |\n",
      "|    clip_fraction        | 0.0126       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.474       |\n",
      "|    explained_variance   | 0.779        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 267          |\n",
      "|    n_updates            | 4580         |\n",
      "|    policy_gradient_loss | -0.00158     |\n",
      "|    value_loss           | 549          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.16e+03      |\n",
      "|    ep_rew_mean          | 1.89e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 460           |\n",
      "|    time_elapsed         | 16316         |\n",
      "|    total_timesteps      | 942080        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00036724436 |\n",
      "|    clip_fraction        | 0.000684      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.47         |\n",
      "|    explained_variance   | 0.805         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 202           |\n",
      "|    n_updates            | 4590          |\n",
      "|    policy_gradient_loss | -0.000116     |\n",
      "|    value_loss           | 418           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.17e+03     |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 461          |\n",
      "|    time_elapsed         | 16351        |\n",
      "|    total_timesteps      | 944128       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0020128028 |\n",
      "|    clip_fraction        | 0.0163       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.508       |\n",
      "|    explained_variance   | 0.822        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 130          |\n",
      "|    n_updates            | 4600         |\n",
      "|    policy_gradient_loss | -0.00102     |\n",
      "|    value_loss           | 339          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.19e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 462          |\n",
      "|    time_elapsed         | 16387        |\n",
      "|    total_timesteps      | 946176       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010656641 |\n",
      "|    clip_fraction        | 0.00723      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.547       |\n",
      "|    explained_variance   | 0.891        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 131          |\n",
      "|    n_updates            | 4610         |\n",
      "|    policy_gradient_loss | -0.00128     |\n",
      "|    value_loss           | 352          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.19e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 463          |\n",
      "|    time_elapsed         | 16422        |\n",
      "|    total_timesteps      | 948224       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0038697282 |\n",
      "|    clip_fraction        | 0.0382       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.48        |\n",
      "|    explained_variance   | 0.902        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 80.2         |\n",
      "|    n_updates            | 4620         |\n",
      "|    policy_gradient_loss | -0.00332     |\n",
      "|    value_loss           | 408          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.19e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 464          |\n",
      "|    time_elapsed         | 16458        |\n",
      "|    total_timesteps      | 950272       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007735933 |\n",
      "|    clip_fraction        | 0.0112       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.429       |\n",
      "|    explained_variance   | 0.704        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 373          |\n",
      "|    n_updates            | 4630         |\n",
      "|    policy_gradient_loss | -0.00082     |\n",
      "|    value_loss           | 562          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.2e+03      |\n",
      "|    ep_rew_mean          | 1.94e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 465          |\n",
      "|    time_elapsed         | 16494        |\n",
      "|    total_timesteps      | 952320       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0025670957 |\n",
      "|    clip_fraction        | 0.00889      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.448       |\n",
      "|    explained_variance   | 0.874        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 233          |\n",
      "|    n_updates            | 4640         |\n",
      "|    policy_gradient_loss | -0.00134     |\n",
      "|    value_loss           | 399          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.21e+03     |\n",
      "|    ep_rew_mean          | 1.95e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 466          |\n",
      "|    time_elapsed         | 16528        |\n",
      "|    total_timesteps      | 954368       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007538949 |\n",
      "|    clip_fraction        | 0.00122      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.42        |\n",
      "|    explained_variance   | 0.916        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 93.7         |\n",
      "|    n_updates            | 4650         |\n",
      "|    policy_gradient_loss | -0.00121     |\n",
      "|    value_loss           | 281          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.21e+03      |\n",
      "|    ep_rew_mean          | 1.94e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 467           |\n",
      "|    time_elapsed         | 16563         |\n",
      "|    total_timesteps      | 956416        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00039533924 |\n",
      "|    clip_fraction        | 0.000488      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.388        |\n",
      "|    explained_variance   | 0.757         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 428           |\n",
      "|    n_updates            | 4660          |\n",
      "|    policy_gradient_loss | 0.000136      |\n",
      "|    value_loss           | 530           |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.22e+03    |\n",
      "|    ep_rew_mean          | 1.95e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 468         |\n",
      "|    time_elapsed         | 16599       |\n",
      "|    total_timesteps      | 958464      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.002141882 |\n",
      "|    clip_fraction        | 0.0317      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.445      |\n",
      "|    explained_variance   | 0.855       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 182         |\n",
      "|    n_updates            | 4670        |\n",
      "|    policy_gradient_loss | -0.00252    |\n",
      "|    value_loss           | 485         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.23e+03     |\n",
      "|    ep_rew_mean          | 1.96e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 469          |\n",
      "|    time_elapsed         | 16634        |\n",
      "|    total_timesteps      | 960512       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008175039 |\n",
      "|    clip_fraction        | 0.000293     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.445       |\n",
      "|    explained_variance   | 0.836        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 178          |\n",
      "|    n_updates            | 4680         |\n",
      "|    policy_gradient_loss | -0.000136    |\n",
      "|    value_loss           | 508          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.24e+03     |\n",
      "|    ep_rew_mean          | 1.96e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 470          |\n",
      "|    time_elapsed         | 16670        |\n",
      "|    total_timesteps      | 962560       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0013683055 |\n",
      "|    clip_fraction        | 0.00386      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.408       |\n",
      "|    explained_variance   | 0.878        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 63.6         |\n",
      "|    n_updates            | 4690         |\n",
      "|    policy_gradient_loss | -0.000276    |\n",
      "|    value_loss           | 354          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.24e+03      |\n",
      "|    ep_rew_mean          | 1.96e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 471           |\n",
      "|    time_elapsed         | 16705         |\n",
      "|    total_timesteps      | 964608        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00039275462 |\n",
      "|    clip_fraction        | 0.00264       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.415        |\n",
      "|    explained_variance   | 0.824         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 365           |\n",
      "|    n_updates            | 4700          |\n",
      "|    policy_gradient_loss | -0.000811     |\n",
      "|    value_loss           | 542           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.25e+03     |\n",
      "|    ep_rew_mean          | 1.96e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 472          |\n",
      "|    time_elapsed         | 16741        |\n",
      "|    total_timesteps      | 966656       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007346139 |\n",
      "|    clip_fraction        | 0.002        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.452       |\n",
      "|    explained_variance   | 0.886        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 129          |\n",
      "|    n_updates            | 4710         |\n",
      "|    policy_gradient_loss | -7.15e-05    |\n",
      "|    value_loss           | 248          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.24e+03     |\n",
      "|    ep_rew_mean          | 1.96e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 473          |\n",
      "|    time_elapsed         | 16777        |\n",
      "|    total_timesteps      | 968704       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0015305313 |\n",
      "|    clip_fraction        | 0.00278      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.396       |\n",
      "|    explained_variance   | 0.783        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 290          |\n",
      "|    n_updates            | 4720         |\n",
      "|    policy_gradient_loss | -0.000641    |\n",
      "|    value_loss           | 598          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.24e+03     |\n",
      "|    ep_rew_mean          | 1.97e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 474          |\n",
      "|    time_elapsed         | 16812        |\n",
      "|    total_timesteps      | 970752       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0015954184 |\n",
      "|    clip_fraction        | 0.0179       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.276       |\n",
      "|    explained_variance   | 0.71         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 189          |\n",
      "|    n_updates            | 4730         |\n",
      "|    policy_gradient_loss | -0.00129     |\n",
      "|    value_loss           | 519          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.26e+03     |\n",
      "|    ep_rew_mean          | 1.99e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 475          |\n",
      "|    time_elapsed         | 16847        |\n",
      "|    total_timesteps      | 972800       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0019290061 |\n",
      "|    clip_fraction        | 0.00952      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.267       |\n",
      "|    explained_variance   | 0.729        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 369          |\n",
      "|    n_updates            | 4740         |\n",
      "|    policy_gradient_loss | -0.00108     |\n",
      "|    value_loss           | 645          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.24e+03      |\n",
      "|    ep_rew_mean          | 1.96e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 476           |\n",
      "|    time_elapsed         | 16883         |\n",
      "|    total_timesteps      | 974848        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00010097725 |\n",
      "|    clip_fraction        | 0.000195      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.235        |\n",
      "|    explained_variance   | 0.865         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 292           |\n",
      "|    n_updates            | 4750          |\n",
      "|    policy_gradient_loss | -0.000242     |\n",
      "|    value_loss           | 316           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.24e+03      |\n",
      "|    ep_rew_mean          | 1.96e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 477           |\n",
      "|    time_elapsed         | 16918         |\n",
      "|    total_timesteps      | 976896        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00017908629 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.211        |\n",
      "|    explained_variance   | 0.751         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 157           |\n",
      "|    n_updates            | 4760          |\n",
      "|    policy_gradient_loss | -6.53e-05     |\n",
      "|    value_loss           | 501           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.23e+03     |\n",
      "|    ep_rew_mean          | 1.94e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 478          |\n",
      "|    time_elapsed         | 16954        |\n",
      "|    total_timesteps      | 978944       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014845901 |\n",
      "|    clip_fraction        | 0.0123       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.232       |\n",
      "|    explained_variance   | 0.819        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 208          |\n",
      "|    n_updates            | 4770         |\n",
      "|    policy_gradient_loss | -0.00112     |\n",
      "|    value_loss           | 503          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.22e+03     |\n",
      "|    ep_rew_mean          | 1.93e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 479          |\n",
      "|    time_elapsed         | 16990        |\n",
      "|    total_timesteps      | 980992       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0013013207 |\n",
      "|    clip_fraction        | 0.00991      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.189       |\n",
      "|    explained_variance   | 0.747        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 321          |\n",
      "|    n_updates            | 4780         |\n",
      "|    policy_gradient_loss | -0.000796    |\n",
      "|    value_loss           | 551          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.23e+03     |\n",
      "|    ep_rew_mean          | 1.94e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 480          |\n",
      "|    time_elapsed         | 17025        |\n",
      "|    total_timesteps      | 983040       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008558929 |\n",
      "|    clip_fraction        | 0.00908      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.173       |\n",
      "|    explained_variance   | 0.764        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 73.2         |\n",
      "|    n_updates            | 4790         |\n",
      "|    policy_gradient_loss | -0.00117     |\n",
      "|    value_loss           | 453          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.22e+03      |\n",
      "|    ep_rew_mean          | 1.94e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 481           |\n",
      "|    time_elapsed         | 17061         |\n",
      "|    total_timesteps      | 985088        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00016433813 |\n",
      "|    clip_fraction        | 0.000146      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.227        |\n",
      "|    explained_variance   | 0.865         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 165           |\n",
      "|    n_updates            | 4800          |\n",
      "|    policy_gradient_loss | -0.000262     |\n",
      "|    value_loss           | 300           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.21e+03     |\n",
      "|    ep_rew_mean          | 1.93e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 482          |\n",
      "|    time_elapsed         | 17097        |\n",
      "|    total_timesteps      | 987136       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008517625 |\n",
      "|    clip_fraction        | 0.00337      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.212       |\n",
      "|    explained_variance   | 0.695        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 140          |\n",
      "|    n_updates            | 4810         |\n",
      "|    policy_gradient_loss | -0.000183    |\n",
      "|    value_loss           | 643          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.21e+03      |\n",
      "|    ep_rew_mean          | 1.93e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 483           |\n",
      "|    time_elapsed         | 17132         |\n",
      "|    total_timesteps      | 989184        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00045649539 |\n",
      "|    clip_fraction        | 0.00293       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.267        |\n",
      "|    explained_variance   | 0.613         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 174           |\n",
      "|    n_updates            | 4820          |\n",
      "|    policy_gradient_loss | -0.000713     |\n",
      "|    value_loss           | 533           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.21e+03      |\n",
      "|    ep_rew_mean          | 1.94e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 484           |\n",
      "|    time_elapsed         | 17167         |\n",
      "|    total_timesteps      | 991232        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00029745986 |\n",
      "|    clip_fraction        | 0.00132       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.258        |\n",
      "|    explained_variance   | 0.737         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 117           |\n",
      "|    n_updates            | 4830          |\n",
      "|    policy_gradient_loss | -0.000584     |\n",
      "|    value_loss           | 443           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.22e+03      |\n",
      "|    ep_rew_mean          | 1.95e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 485           |\n",
      "|    time_elapsed         | 17203         |\n",
      "|    total_timesteps      | 993280        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00063989265 |\n",
      "|    clip_fraction        | 0.000879      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.262        |\n",
      "|    explained_variance   | 0.808         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 169           |\n",
      "|    n_updates            | 4840          |\n",
      "|    policy_gradient_loss | -0.000655     |\n",
      "|    value_loss           | 396           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.2e+03       |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 486           |\n",
      "|    time_elapsed         | 17238         |\n",
      "|    total_timesteps      | 995328        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00035092895 |\n",
      "|    clip_fraction        | 0.00332       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.254        |\n",
      "|    explained_variance   | 0.8           |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 85.3          |\n",
      "|    n_updates            | 4850          |\n",
      "|    policy_gradient_loss | -0.000781     |\n",
      "|    value_loss           | 400           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.2e+03      |\n",
      "|    ep_rew_mean          | 1.93e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 487          |\n",
      "|    time_elapsed         | 17274        |\n",
      "|    total_timesteps      | 997376       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0018815999 |\n",
      "|    clip_fraction        | 0.025        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.295       |\n",
      "|    explained_variance   | 0.772        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 189          |\n",
      "|    n_updates            | 4860         |\n",
      "|    policy_gradient_loss | -0.00275     |\n",
      "|    value_loss           | 581          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.21e+03      |\n",
      "|    ep_rew_mean          | 1.94e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 488           |\n",
      "|    time_elapsed         | 17309         |\n",
      "|    total_timesteps      | 999424        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00025210695 |\n",
      "|    clip_fraction        | 0.000195      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.322        |\n",
      "|    explained_variance   | 0.854         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 189           |\n",
      "|    n_updates            | 4870          |\n",
      "|    policy_gradient_loss | -0.000605     |\n",
      "|    value_loss           | 346           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.2e+03       |\n",
      "|    ep_rew_mean          | 1.94e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 489           |\n",
      "|    time_elapsed         | 17346         |\n",
      "|    total_timesteps      | 1001472       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00012626837 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.288        |\n",
      "|    explained_variance   | 0.736         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 242           |\n",
      "|    n_updates            | 4880          |\n",
      "|    policy_gradient_loss | -0.000258     |\n",
      "|    value_loss           | 502           |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.21e+03    |\n",
      "|    ep_rew_mean          | 1.94e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 490         |\n",
      "|    time_elapsed         | 17381       |\n",
      "|    total_timesteps      | 1003520     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001474967 |\n",
      "|    clip_fraction        | 0.00664     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.266      |\n",
      "|    explained_variance   | 0.728       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 122         |\n",
      "|    n_updates            | 4890        |\n",
      "|    policy_gradient_loss | -0.000598   |\n",
      "|    value_loss           | 539         |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.21e+03      |\n",
      "|    ep_rew_mean          | 1.95e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 491           |\n",
      "|    time_elapsed         | 17417         |\n",
      "|    total_timesteps      | 1005568       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00075400446 |\n",
      "|    clip_fraction        | 0.000781      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.262        |\n",
      "|    explained_variance   | 0.823         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 409           |\n",
      "|    n_updates            | 4900          |\n",
      "|    policy_gradient_loss | -0.000859     |\n",
      "|    value_loss           | 405           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.19e+03     |\n",
      "|    ep_rew_mean          | 1.94e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 492          |\n",
      "|    time_elapsed         | 17453        |\n",
      "|    total_timesteps      | 1007616      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012113393 |\n",
      "|    clip_fraction        | 0.016        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.24        |\n",
      "|    explained_variance   | 0.802        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 177          |\n",
      "|    n_updates            | 4910         |\n",
      "|    policy_gradient_loss | -0.00118     |\n",
      "|    value_loss           | 427          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.18e+03     |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 493          |\n",
      "|    time_elapsed         | 17489        |\n",
      "|    total_timesteps      | 1009664      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0009750653 |\n",
      "|    clip_fraction        | 0.00107      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.287       |\n",
      "|    explained_variance   | 0.83         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 194          |\n",
      "|    n_updates            | 4920         |\n",
      "|    policy_gradient_loss | -0.000871    |\n",
      "|    value_loss           | 464          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.2e+03       |\n",
      "|    ep_rew_mean          | 1.94e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 494           |\n",
      "|    time_elapsed         | 17524         |\n",
      "|    total_timesteps      | 1011712       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00027614826 |\n",
      "|    clip_fraction        | 0.00239       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.288        |\n",
      "|    explained_variance   | 0.889         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 53.8          |\n",
      "|    n_updates            | 4930          |\n",
      "|    policy_gradient_loss | -0.000608     |\n",
      "|    value_loss           | 410           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.19e+03     |\n",
      "|    ep_rew_mean          | 1.93e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 495          |\n",
      "|    time_elapsed         | 17560        |\n",
      "|    total_timesteps      | 1013760      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005393466 |\n",
      "|    clip_fraction        | 0.00117      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.216       |\n",
      "|    explained_variance   | 0.85         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 87.4         |\n",
      "|    n_updates            | 4940         |\n",
      "|    policy_gradient_loss | -6.66e-05    |\n",
      "|    value_loss           | 307          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.18e+03      |\n",
      "|    ep_rew_mean          | 1.93e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 496           |\n",
      "|    time_elapsed         | 17595         |\n",
      "|    total_timesteps      | 1015808       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00043632896 |\n",
      "|    clip_fraction        | 0.00254       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.214        |\n",
      "|    explained_variance   | 0.783         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 135           |\n",
      "|    n_updates            | 4950          |\n",
      "|    policy_gradient_loss | -0.000756     |\n",
      "|    value_loss           | 520           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.17e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 497           |\n",
      "|    time_elapsed         | 17631         |\n",
      "|    total_timesteps      | 1017856       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00086197787 |\n",
      "|    clip_fraction        | 0.00679       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.233        |\n",
      "|    explained_variance   | 0.788         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 163           |\n",
      "|    n_updates            | 4960          |\n",
      "|    policy_gradient_loss | -0.000831     |\n",
      "|    value_loss           | 504           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.18e+03      |\n",
      "|    ep_rew_mean          | 1.95e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 498           |\n",
      "|    time_elapsed         | 17667         |\n",
      "|    total_timesteps      | 1019904       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00019284489 |\n",
      "|    clip_fraction        | 0.000293      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.199        |\n",
      "|    explained_variance   | 0.79          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 110           |\n",
      "|    n_updates            | 4970          |\n",
      "|    policy_gradient_loss | -0.000361     |\n",
      "|    value_loss           | 341           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.17e+03      |\n",
      "|    ep_rew_mean          | 1.93e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 499           |\n",
      "|    time_elapsed         | 17704         |\n",
      "|    total_timesteps      | 1021952       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00031721784 |\n",
      "|    clip_fraction        | 0.00205       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.212        |\n",
      "|    explained_variance   | 0.678         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 472           |\n",
      "|    n_updates            | 4980          |\n",
      "|    policy_gradient_loss | -0.000498     |\n",
      "|    value_loss           | 604           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.17e+03     |\n",
      "|    ep_rew_mean          | 1.93e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 500          |\n",
      "|    time_elapsed         | 17738        |\n",
      "|    total_timesteps      | 1024000      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006235817 |\n",
      "|    clip_fraction        | 0.00498      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.231       |\n",
      "|    explained_variance   | 0.613        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 165          |\n",
      "|    n_updates            | 4990         |\n",
      "|    policy_gradient_loss | -0.000583    |\n",
      "|    value_loss           | 680          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.16e+03      |\n",
      "|    ep_rew_mean          | 1.91e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 501           |\n",
      "|    time_elapsed         | 17774         |\n",
      "|    total_timesteps      | 1026048       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00067639526 |\n",
      "|    clip_fraction        | 0.00962       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.283        |\n",
      "|    explained_variance   | 0.77          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 215           |\n",
      "|    n_updates            | 5000          |\n",
      "|    policy_gradient_loss | -0.00178      |\n",
      "|    value_loss           | 432           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.16e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 502           |\n",
      "|    time_elapsed         | 17809         |\n",
      "|    total_timesteps      | 1028096       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00095343904 |\n",
      "|    clip_fraction        | 0.0162        |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.318        |\n",
      "|    explained_variance   | 0.652         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 515           |\n",
      "|    n_updates            | 5010          |\n",
      "|    policy_gradient_loss | -0.00106      |\n",
      "|    value_loss           | 599           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.16e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 503           |\n",
      "|    time_elapsed         | 17844         |\n",
      "|    total_timesteps      | 1030144       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00039875574 |\n",
      "|    clip_fraction        | 0.00122       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.316        |\n",
      "|    explained_variance   | 0.834         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 250           |\n",
      "|    n_updates            | 5020          |\n",
      "|    policy_gradient_loss | -0.000449     |\n",
      "|    value_loss           | 287           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.16e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 504           |\n",
      "|    time_elapsed         | 17880         |\n",
      "|    total_timesteps      | 1032192       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00023949324 |\n",
      "|    clip_fraction        | 0.000537      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.309        |\n",
      "|    explained_variance   | 0.706         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 201           |\n",
      "|    n_updates            | 5030          |\n",
      "|    policy_gradient_loss | -0.000429     |\n",
      "|    value_loss           | 516           |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.17e+03    |\n",
      "|    ep_rew_mean          | 1.92e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 505         |\n",
      "|    time_elapsed         | 17915       |\n",
      "|    total_timesteps      | 1034240     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.000639322 |\n",
      "|    clip_fraction        | 0.000977    |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.314      |\n",
      "|    explained_variance   | 0.621       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 211         |\n",
      "|    n_updates            | 5040        |\n",
      "|    policy_gradient_loss | -0.000676   |\n",
      "|    value_loss           | 746         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.16e+03    |\n",
      "|    ep_rew_mean          | 1.93e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 506         |\n",
      "|    time_elapsed         | 17951       |\n",
      "|    total_timesteps      | 1036288     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.002089482 |\n",
      "|    clip_fraction        | 0.00933     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.359      |\n",
      "|    explained_variance   | 0.769       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 222         |\n",
      "|    n_updates            | 5050        |\n",
      "|    policy_gradient_loss | -0.00135    |\n",
      "|    value_loss           | 436         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 507          |\n",
      "|    time_elapsed         | 17986        |\n",
      "|    total_timesteps      | 1038336      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014743035 |\n",
      "|    clip_fraction        | 0.0112       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.337       |\n",
      "|    explained_variance   | 0.723        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 137          |\n",
      "|    n_updates            | 5060         |\n",
      "|    policy_gradient_loss | -0.000708    |\n",
      "|    value_loss           | 596          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.16e+03      |\n",
      "|    ep_rew_mean          | 1.93e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 508           |\n",
      "|    time_elapsed         | 18024         |\n",
      "|    total_timesteps      | 1040384       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00029876686 |\n",
      "|    clip_fraction        | 0.000244      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.29         |\n",
      "|    explained_variance   | 0.742         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 353           |\n",
      "|    n_updates            | 5070          |\n",
      "|    policy_gradient_loss | -0.000382     |\n",
      "|    value_loss           | 396           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 509          |\n",
      "|    time_elapsed         | 18059        |\n",
      "|    total_timesteps      | 1042432      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006028134 |\n",
      "|    clip_fraction        | 0.00776      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.338       |\n",
      "|    explained_variance   | 0.752        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 133          |\n",
      "|    n_updates            | 5080         |\n",
      "|    policy_gradient_loss | -0.0013      |\n",
      "|    value_loss           | 494          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.15e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 510           |\n",
      "|    time_elapsed         | 18096         |\n",
      "|    total_timesteps      | 1044480       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00021092442 |\n",
      "|    clip_fraction        | 4.88e-05      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.309        |\n",
      "|    explained_variance   | 0.734         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 214           |\n",
      "|    n_updates            | 5090          |\n",
      "|    policy_gradient_loss | -0.000427     |\n",
      "|    value_loss           | 501           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.89e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 511          |\n",
      "|    time_elapsed         | 18131        |\n",
      "|    total_timesteps      | 1046528      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010050032 |\n",
      "|    clip_fraction        | 0.0119       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.275       |\n",
      "|    explained_variance   | 0.792        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 244          |\n",
      "|    n_updates            | 5100         |\n",
      "|    policy_gradient_loss | -0.00191     |\n",
      "|    value_loss           | 452          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.14e+03     |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 512          |\n",
      "|    time_elapsed         | 18167        |\n",
      "|    total_timesteps      | 1048576      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008097823 |\n",
      "|    clip_fraction        | 0.00918      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.297       |\n",
      "|    explained_variance   | 0.692        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 567          |\n",
      "|    n_updates            | 5110         |\n",
      "|    policy_gradient_loss | -0.000443    |\n",
      "|    value_loss           | 720          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.14e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 513          |\n",
      "|    time_elapsed         | 18202        |\n",
      "|    total_timesteps      | 1050624      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005523423 |\n",
      "|    clip_fraction        | 0.00479      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.327       |\n",
      "|    explained_variance   | 0.66         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 143          |\n",
      "|    n_updates            | 5120         |\n",
      "|    policy_gradient_loss | -0.000288    |\n",
      "|    value_loss           | 411          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.15e+03      |\n",
      "|    ep_rew_mean          | 1.94e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 514           |\n",
      "|    time_elapsed         | 18238         |\n",
      "|    total_timesteps      | 1052672       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00022784428 |\n",
      "|    clip_fraction        | 0.000244      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.342        |\n",
      "|    explained_variance   | 0.69          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 390           |\n",
      "|    n_updates            | 5130          |\n",
      "|    policy_gradient_loss | -0.000558     |\n",
      "|    value_loss           | 419           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 1.95e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 515          |\n",
      "|    time_elapsed         | 18273        |\n",
      "|    total_timesteps      | 1054720      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0004966375 |\n",
      "|    clip_fraction        | 4.88e-05     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.396       |\n",
      "|    explained_variance   | 0.743        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 434          |\n",
      "|    n_updates            | 5140         |\n",
      "|    policy_gradient_loss | -0.000588    |\n",
      "|    value_loss           | 548          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.12e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 516           |\n",
      "|    time_elapsed         | 18309         |\n",
      "|    total_timesteps      | 1056768       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00094528205 |\n",
      "|    clip_fraction        | 0.004         |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.323        |\n",
      "|    explained_variance   | 0.88          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 113           |\n",
      "|    n_updates            | 5150          |\n",
      "|    policy_gradient_loss | -0.00036      |\n",
      "|    value_loss           | 272           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 517          |\n",
      "|    time_elapsed         | 18344        |\n",
      "|    total_timesteps      | 1058816      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0009462673 |\n",
      "|    clip_fraction        | 0.0123       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.296       |\n",
      "|    explained_variance   | 0.512        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 298          |\n",
      "|    n_updates            | 5160         |\n",
      "|    policy_gradient_loss | -0.000552    |\n",
      "|    value_loss           | 745          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.93e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 518          |\n",
      "|    time_elapsed         | 18380        |\n",
      "|    total_timesteps      | 1060864      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0003648148 |\n",
      "|    clip_fraction        | 0.00112      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.337       |\n",
      "|    explained_variance   | 0.753        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 101          |\n",
      "|    n_updates            | 5170         |\n",
      "|    policy_gradient_loss | -0.0003      |\n",
      "|    value_loss           | 334          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 519          |\n",
      "|    time_elapsed         | 18415        |\n",
      "|    total_timesteps      | 1062912      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0017431239 |\n",
      "|    clip_fraction        | 0.0113       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.352       |\n",
      "|    explained_variance   | 0.643        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 110          |\n",
      "|    n_updates            | 5180         |\n",
      "|    policy_gradient_loss | -0.00166     |\n",
      "|    value_loss           | 590          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.9e+03       |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 520           |\n",
      "|    time_elapsed         | 18450         |\n",
      "|    total_timesteps      | 1064960       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00026878947 |\n",
      "|    clip_fraction        | 0.000342      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.339        |\n",
      "|    explained_variance   | 0.82          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 103           |\n",
      "|    n_updates            | 5190          |\n",
      "|    policy_gradient_loss | -5.5e-05      |\n",
      "|    value_loss           | 403           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 521          |\n",
      "|    time_elapsed         | 18485        |\n",
      "|    total_timesteps      | 1067008      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008249299 |\n",
      "|    clip_fraction        | 0.0132       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.383       |\n",
      "|    explained_variance   | 0.817        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 162          |\n",
      "|    n_updates            | 5200         |\n",
      "|    policy_gradient_loss | -0.00191     |\n",
      "|    value_loss           | 523          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.08e+03      |\n",
      "|    ep_rew_mean          | 1.87e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 522           |\n",
      "|    time_elapsed         | 18521         |\n",
      "|    total_timesteps      | 1069056       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00012947261 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.373        |\n",
      "|    explained_variance   | 0.8           |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 251           |\n",
      "|    n_updates            | 5210          |\n",
      "|    policy_gradient_loss | -0.000241     |\n",
      "|    value_loss           | 599           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.08e+03     |\n",
      "|    ep_rew_mean          | 1.87e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 523          |\n",
      "|    time_elapsed         | 18556        |\n",
      "|    total_timesteps      | 1071104      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007074261 |\n",
      "|    clip_fraction        | 0.00117      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.304       |\n",
      "|    explained_variance   | 0.705        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 343          |\n",
      "|    n_updates            | 5220         |\n",
      "|    policy_gradient_loss | -0.000371    |\n",
      "|    value_loss           | 585          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.06e+03     |\n",
      "|    ep_rew_mean          | 1.86e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 524          |\n",
      "|    time_elapsed         | 18592        |\n",
      "|    total_timesteps      | 1073152      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007942526 |\n",
      "|    clip_fraction        | 0.00708      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.296       |\n",
      "|    explained_variance   | 0.822        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 402          |\n",
      "|    n_updates            | 5230         |\n",
      "|    policy_gradient_loss | -0.000481    |\n",
      "|    value_loss           | 339          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.06e+03      |\n",
      "|    ep_rew_mean          | 1.85e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 525           |\n",
      "|    time_elapsed         | 18627         |\n",
      "|    total_timesteps      | 1075200       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00097230857 |\n",
      "|    clip_fraction        | 0.00508       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.352        |\n",
      "|    explained_variance   | 0.769         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 183           |\n",
      "|    n_updates            | 5240          |\n",
      "|    policy_gradient_loss | -0.000897     |\n",
      "|    value_loss           | 514           |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.07e+03    |\n",
      "|    ep_rew_mean          | 1.87e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 526         |\n",
      "|    time_elapsed         | 18662       |\n",
      "|    total_timesteps      | 1077248     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.000899532 |\n",
      "|    clip_fraction        | 0.00879     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.386      |\n",
      "|    explained_variance   | 0.757       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 362         |\n",
      "|    n_updates            | 5250        |\n",
      "|    policy_gradient_loss | -0.00109    |\n",
      "|    value_loss           | 580         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.05e+03     |\n",
      "|    ep_rew_mean          | 1.85e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 527          |\n",
      "|    time_elapsed         | 18697        |\n",
      "|    total_timesteps      | 1079296      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0013362137 |\n",
      "|    clip_fraction        | 0.00864      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.404       |\n",
      "|    explained_variance   | 0.838        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 161          |\n",
      "|    n_updates            | 5260         |\n",
      "|    policy_gradient_loss | -0.00139     |\n",
      "|    value_loss           | 281          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.06e+03      |\n",
      "|    ep_rew_mean          | 1.85e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 528           |\n",
      "|    time_elapsed         | 18734         |\n",
      "|    total_timesteps      | 1081344       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00022944919 |\n",
      "|    clip_fraction        | 0.000244      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.44         |\n",
      "|    explained_variance   | 0.757         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 195           |\n",
      "|    n_updates            | 5270          |\n",
      "|    policy_gradient_loss | -0.000319     |\n",
      "|    value_loss           | 633           |\n",
      "-------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 1.08e+03   |\n",
      "|    ep_rew_mean          | 1.87e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 57         |\n",
      "|    iterations           | 529        |\n",
      "|    time_elapsed         | 18767      |\n",
      "|    total_timesteps      | 1083392    |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.00095289 |\n",
      "|    clip_fraction        | 0.00356    |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -0.414     |\n",
      "|    explained_variance   | 0.873      |\n",
      "|    learning_rate        | 1e-06      |\n",
      "|    loss                 | 375        |\n",
      "|    n_updates            | 5280       |\n",
      "|    policy_gradient_loss | -0.000576  |\n",
      "|    value_loss           | 362        |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.08e+03     |\n",
      "|    ep_rew_mean          | 1.88e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 530          |\n",
      "|    time_elapsed         | 18803        |\n",
      "|    total_timesteps      | 1085440      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0015696955 |\n",
      "|    clip_fraction        | 0.00488      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.427       |\n",
      "|    explained_variance   | 0.874        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 130          |\n",
      "|    n_updates            | 5290         |\n",
      "|    policy_gradient_loss | -0.00131     |\n",
      "|    value_loss           | 349          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.09e+03      |\n",
      "|    ep_rew_mean          | 1.88e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 531           |\n",
      "|    time_elapsed         | 18838         |\n",
      "|    total_timesteps      | 1087488       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00025963897 |\n",
      "|    clip_fraction        | 0.000244      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.407        |\n",
      "|    explained_variance   | 0.837         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 143           |\n",
      "|    n_updates            | 5300          |\n",
      "|    policy_gradient_loss | -0.000112     |\n",
      "|    value_loss           | 576           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 532          |\n",
      "|    time_elapsed         | 18873        |\n",
      "|    total_timesteps      | 1089536      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0017526704 |\n",
      "|    clip_fraction        | 0.00903      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.37        |\n",
      "|    explained_variance   | 0.859        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 77.8         |\n",
      "|    n_updates            | 5310         |\n",
      "|    policy_gradient_loss | -0.00128     |\n",
      "|    value_loss           | 324          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.1e+03       |\n",
      "|    ep_rew_mean          | 1.91e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 533           |\n",
      "|    time_elapsed         | 18909         |\n",
      "|    total_timesteps      | 1091584       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00037220967 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.376        |\n",
      "|    explained_variance   | 0.856         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 146           |\n",
      "|    n_updates            | 5320          |\n",
      "|    policy_gradient_loss | -0.000259     |\n",
      "|    value_loss           | 419           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 534          |\n",
      "|    time_elapsed         | 18944        |\n",
      "|    total_timesteps      | 1093632      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010988382 |\n",
      "|    clip_fraction        | 0.00894      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.328       |\n",
      "|    explained_variance   | 0.804        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 187          |\n",
      "|    n_updates            | 5330         |\n",
      "|    policy_gradient_loss | -0.000755    |\n",
      "|    value_loss           | 467          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 535           |\n",
      "|    time_elapsed         | 18979         |\n",
      "|    total_timesteps      | 1095680       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00066421484 |\n",
      "|    clip_fraction        | 0.00288       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.346        |\n",
      "|    explained_variance   | 0.829         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 133           |\n",
      "|    n_updates            | 5340          |\n",
      "|    policy_gradient_loss | -8.61e-05     |\n",
      "|    value_loss           | 448           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 536          |\n",
      "|    time_elapsed         | 19014        |\n",
      "|    total_timesteps      | 1097728      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008392509 |\n",
      "|    clip_fraction        | 0.00313      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.343       |\n",
      "|    explained_variance   | 0.779        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 129          |\n",
      "|    n_updates            | 5350         |\n",
      "|    policy_gradient_loss | -0.000421    |\n",
      "|    value_loss           | 497          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 537           |\n",
      "|    time_elapsed         | 19050         |\n",
      "|    total_timesteps      | 1099776       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00026683876 |\n",
      "|    clip_fraction        | 4.88e-05      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.393        |\n",
      "|    explained_variance   | 0.822         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 200           |\n",
      "|    n_updates            | 5360          |\n",
      "|    policy_gradient_loss | -0.000511     |\n",
      "|    value_loss           | 482           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 538          |\n",
      "|    time_elapsed         | 19087        |\n",
      "|    total_timesteps      | 1101824      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0003254526 |\n",
      "|    clip_fraction        | 0.000635     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.402       |\n",
      "|    explained_variance   | 0.809        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 219          |\n",
      "|    n_updates            | 5370         |\n",
      "|    policy_gradient_loss | -0.000604    |\n",
      "|    value_loss           | 485          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.11e+03    |\n",
      "|    ep_rew_mean          | 1.9e+03     |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 539         |\n",
      "|    time_elapsed         | 19121       |\n",
      "|    total_timesteps      | 1103872     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001402304 |\n",
      "|    clip_fraction        | 0.00889     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.443      |\n",
      "|    explained_variance   | 0.725       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 334         |\n",
      "|    n_updates            | 5380        |\n",
      "|    policy_gradient_loss | -0.000693   |\n",
      "|    value_loss           | 604         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 540          |\n",
      "|    time_elapsed         | 19156        |\n",
      "|    total_timesteps      | 1105920      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014034924 |\n",
      "|    clip_fraction        | 0.00483      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.45        |\n",
      "|    explained_variance   | 0.876        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 126          |\n",
      "|    n_updates            | 5390         |\n",
      "|    policy_gradient_loss | -0.00118     |\n",
      "|    value_loss           | 327          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 541          |\n",
      "|    time_elapsed         | 19191        |\n",
      "|    total_timesteps      | 1107968      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0009217169 |\n",
      "|    clip_fraction        | 0.00903      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.447       |\n",
      "|    explained_variance   | 0.811        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 199          |\n",
      "|    n_updates            | 5400         |\n",
      "|    policy_gradient_loss | -0.00263     |\n",
      "|    value_loss           | 428          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 542          |\n",
      "|    time_elapsed         | 19227        |\n",
      "|    total_timesteps      | 1110016      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005450436 |\n",
      "|    clip_fraction        | 0.0043       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.409       |\n",
      "|    explained_variance   | 0.858        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 116          |\n",
      "|    n_updates            | 5410         |\n",
      "|    policy_gradient_loss | -0.000777    |\n",
      "|    value_loss           | 390          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 543          |\n",
      "|    time_elapsed         | 19263        |\n",
      "|    total_timesteps      | 1112064      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0013899227 |\n",
      "|    clip_fraction        | 0.0166       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.393       |\n",
      "|    explained_variance   | 0.893        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 312          |\n",
      "|    n_updates            | 5420         |\n",
      "|    policy_gradient_loss | -0.00113     |\n",
      "|    value_loss           | 393          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 544          |\n",
      "|    time_elapsed         | 19298        |\n",
      "|    total_timesteps      | 1114112      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008987222 |\n",
      "|    clip_fraction        | 0.00937      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.385       |\n",
      "|    explained_variance   | 0.78         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 121          |\n",
      "|    n_updates            | 5430         |\n",
      "|    policy_gradient_loss | -0.00138     |\n",
      "|    value_loss           | 357          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 545          |\n",
      "|    time_elapsed         | 19333        |\n",
      "|    total_timesteps      | 1116160      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010752899 |\n",
      "|    clip_fraction        | 0.0128       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.408       |\n",
      "|    explained_variance   | 0.793        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 150          |\n",
      "|    n_updates            | 5440         |\n",
      "|    policy_gradient_loss | -0.00207     |\n",
      "|    value_loss           | 377          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 546          |\n",
      "|    time_elapsed         | 19368        |\n",
      "|    total_timesteps      | 1118208      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0004962726 |\n",
      "|    clip_fraction        | 0.000977     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.409       |\n",
      "|    explained_variance   | 0.75         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 133          |\n",
      "|    n_updates            | 5450         |\n",
      "|    policy_gradient_loss | -0.00054     |\n",
      "|    value_loss           | 376          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.11e+03    |\n",
      "|    ep_rew_mean          | 1.91e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 547         |\n",
      "|    time_elapsed         | 19404       |\n",
      "|    total_timesteps      | 1120256     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.000538441 |\n",
      "|    clip_fraction        | 0.00146     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.408      |\n",
      "|    explained_variance   | 0.841       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 632         |\n",
      "|    n_updates            | 5460        |\n",
      "|    policy_gradient_loss | -0.000515   |\n",
      "|    value_loss           | 430         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 548          |\n",
      "|    time_elapsed         | 19438        |\n",
      "|    total_timesteps      | 1122304      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012481817 |\n",
      "|    clip_fraction        | 0.0131       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.346       |\n",
      "|    explained_variance   | 0.875        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 134          |\n",
      "|    n_updates            | 5470         |\n",
      "|    policy_gradient_loss | -0.00169     |\n",
      "|    value_loss           | 322          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 549           |\n",
      "|    time_elapsed         | 19473         |\n",
      "|    total_timesteps      | 1124352       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00015814585 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.31         |\n",
      "|    explained_variance   | 0.81          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 116           |\n",
      "|    n_updates            | 5480          |\n",
      "|    policy_gradient_loss | -0.000184     |\n",
      "|    value_loss           | 390           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 550          |\n",
      "|    time_elapsed         | 19509        |\n",
      "|    total_timesteps      | 1126400      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007579974 |\n",
      "|    clip_fraction        | 0.000244     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.346       |\n",
      "|    explained_variance   | 0.765        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 336          |\n",
      "|    n_updates            | 5490         |\n",
      "|    policy_gradient_loss | -0.000539    |\n",
      "|    value_loss           | 510          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 551          |\n",
      "|    time_elapsed         | 19544        |\n",
      "|    total_timesteps      | 1128448      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012963739 |\n",
      "|    clip_fraction        | 0.00488      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.334       |\n",
      "|    explained_variance   | 0.803        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 79.5         |\n",
      "|    n_updates            | 5500         |\n",
      "|    policy_gradient_loss | -0.00049     |\n",
      "|    value_loss           | 362          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 552          |\n",
      "|    time_elapsed         | 19579        |\n",
      "|    total_timesteps      | 1130496      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011149985 |\n",
      "|    clip_fraction        | 0.00146      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.307       |\n",
      "|    explained_variance   | 0.717        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 530          |\n",
      "|    n_updates            | 5510         |\n",
      "|    policy_gradient_loss | -0.000703    |\n",
      "|    value_loss           | 578          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.12e+03      |\n",
      "|    ep_rew_mean          | 1.91e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 553           |\n",
      "|    time_elapsed         | 19615         |\n",
      "|    total_timesteps      | 1132544       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00036263265 |\n",
      "|    clip_fraction        | 0.00166       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.319        |\n",
      "|    explained_variance   | 0.769         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 192           |\n",
      "|    n_updates            | 5520          |\n",
      "|    policy_gradient_loss | -0.000591     |\n",
      "|    value_loss           | 491           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 554          |\n",
      "|    time_elapsed         | 19650        |\n",
      "|    total_timesteps      | 1134592      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006323299 |\n",
      "|    clip_fraction        | 0.00205      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.31        |\n",
      "|    explained_variance   | 0.865        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 229          |\n",
      "|    n_updates            | 5530         |\n",
      "|    policy_gradient_loss | -0.000685    |\n",
      "|    value_loss           | 308          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.93e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 555          |\n",
      "|    time_elapsed         | 19686        |\n",
      "|    total_timesteps      | 1136640      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005240195 |\n",
      "|    clip_fraction        | 0.00562      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.317       |\n",
      "|    explained_variance   | 0.732        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 196          |\n",
      "|    n_updates            | 5540         |\n",
      "|    policy_gradient_loss | -0.000499    |\n",
      "|    value_loss           | 523          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 556          |\n",
      "|    time_elapsed         | 19721        |\n",
      "|    total_timesteps      | 1138688      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0015827097 |\n",
      "|    clip_fraction        | 0.00972      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.276       |\n",
      "|    explained_variance   | 0.726        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 335          |\n",
      "|    n_updates            | 5550         |\n",
      "|    policy_gradient_loss | -0.000957    |\n",
      "|    value_loss           | 512          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.91e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 557           |\n",
      "|    time_elapsed         | 19757         |\n",
      "|    total_timesteps      | 1140736       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00056384405 |\n",
      "|    clip_fraction        | 0.00894       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.313        |\n",
      "|    explained_variance   | 0.662         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 200           |\n",
      "|    n_updates            | 5560          |\n",
      "|    policy_gradient_loss | -4.2e-05      |\n",
      "|    value_loss           | 749           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.91e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 558           |\n",
      "|    time_elapsed         | 19792         |\n",
      "|    total_timesteps      | 1142784       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00019881217 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.361        |\n",
      "|    explained_variance   | 0.753         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 143           |\n",
      "|    n_updates            | 5570          |\n",
      "|    policy_gradient_loss | -3.18e-05     |\n",
      "|    value_loss           | 516           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.94e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 559          |\n",
      "|    time_elapsed         | 19827        |\n",
      "|    total_timesteps      | 1144832      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006981031 |\n",
      "|    clip_fraction        | 0.00381      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.315       |\n",
      "|    explained_variance   | 0.814        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 99.5         |\n",
      "|    n_updates            | 5580         |\n",
      "|    policy_gradient_loss | -0.000758    |\n",
      "|    value_loss           | 308          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.13e+03      |\n",
      "|    ep_rew_mean          | 1.95e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 560           |\n",
      "|    time_elapsed         | 19862         |\n",
      "|    total_timesteps      | 1146880       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00023054401 |\n",
      "|    clip_fraction        | 0.000293      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.347        |\n",
      "|    explained_variance   | 0.685         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 256           |\n",
      "|    n_updates            | 5590          |\n",
      "|    policy_gradient_loss | -9.54e-05     |\n",
      "|    value_loss           | 674           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.14e+03      |\n",
      "|    ep_rew_mean          | 1.95e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 561           |\n",
      "|    time_elapsed         | 19897         |\n",
      "|    total_timesteps      | 1148928       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00065534766 |\n",
      "|    clip_fraction        | 0.00205       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.327        |\n",
      "|    explained_variance   | 0.857         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 96.6          |\n",
      "|    n_updates            | 5600          |\n",
      "|    policy_gradient_loss | -0.000241     |\n",
      "|    value_loss           | 297           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.14e+03     |\n",
      "|    ep_rew_mean          | 1.96e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 562          |\n",
      "|    time_elapsed         | 19933        |\n",
      "|    total_timesteps      | 1150976      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005459343 |\n",
      "|    clip_fraction        | 0.00405      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.312       |\n",
      "|    explained_variance   | 0.811        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 107          |\n",
      "|    n_updates            | 5610         |\n",
      "|    policy_gradient_loss | -0.000257    |\n",
      "|    value_loss           | 348          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.15e+03     |\n",
      "|    ep_rew_mean          | 1.98e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 563          |\n",
      "|    time_elapsed         | 19969        |\n",
      "|    total_timesteps      | 1153024      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010801337 |\n",
      "|    clip_fraction        | 0.00566      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.29        |\n",
      "|    explained_variance   | 0.87         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 130          |\n",
      "|    n_updates            | 5620         |\n",
      "|    policy_gradient_loss | -0.000973    |\n",
      "|    value_loss           | 318          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.14e+03      |\n",
      "|    ep_rew_mean          | 1.96e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 564           |\n",
      "|    time_elapsed         | 20004         |\n",
      "|    total_timesteps      | 1155072       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00055995636 |\n",
      "|    clip_fraction        | 0.000391      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.287        |\n",
      "|    explained_variance   | 0.756         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 553           |\n",
      "|    n_updates            | 5630          |\n",
      "|    policy_gradient_loss | -0.000413     |\n",
      "|    value_loss           | 495           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.14e+03      |\n",
      "|    ep_rew_mean          | 1.96e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 565           |\n",
      "|    time_elapsed         | 20039         |\n",
      "|    total_timesteps      | 1157120       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00049768924 |\n",
      "|    clip_fraction        | 0.00156       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.282        |\n",
      "|    explained_variance   | 0.742         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 106           |\n",
      "|    n_updates            | 5640          |\n",
      "|    policy_gradient_loss | -0.000698     |\n",
      "|    value_loss           | 487           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.15e+03     |\n",
      "|    ep_rew_mean          | 1.97e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 566          |\n",
      "|    time_elapsed         | 20074        |\n",
      "|    total_timesteps      | 1159168      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0009687396 |\n",
      "|    clip_fraction        | 0.0242       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.339       |\n",
      "|    explained_variance   | 0.692        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 132          |\n",
      "|    n_updates            | 5650         |\n",
      "|    policy_gradient_loss | -0.00187     |\n",
      "|    value_loss           | 563          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.15e+03     |\n",
      "|    ep_rew_mean          | 1.97e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 567          |\n",
      "|    time_elapsed         | 20110        |\n",
      "|    total_timesteps      | 1161216      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0015170322 |\n",
      "|    clip_fraction        | 0.00532      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.321       |\n",
      "|    explained_variance   | 0.815        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 201          |\n",
      "|    n_updates            | 5660         |\n",
      "|    policy_gradient_loss | -0.000802    |\n",
      "|    value_loss           | 404          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.15e+03      |\n",
      "|    ep_rew_mean          | 1.96e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 568           |\n",
      "|    time_elapsed         | 20145         |\n",
      "|    total_timesteps      | 1163264       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00037500542 |\n",
      "|    clip_fraction        | 0.000293      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.308        |\n",
      "|    explained_variance   | 0.856         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 236           |\n",
      "|    n_updates            | 5670          |\n",
      "|    policy_gradient_loss | -0.000372     |\n",
      "|    value_loss           | 391           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.14e+03      |\n",
      "|    ep_rew_mean          | 1.95e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 569           |\n",
      "|    time_elapsed         | 20181         |\n",
      "|    total_timesteps      | 1165312       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00029172437 |\n",
      "|    clip_fraction        | 0.00596       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.296        |\n",
      "|    explained_variance   | 0.719         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 511           |\n",
      "|    n_updates            | 5680          |\n",
      "|    policy_gradient_loss | -0.000682     |\n",
      "|    value_loss           | 605           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.93e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 570          |\n",
      "|    time_elapsed         | 20217        |\n",
      "|    total_timesteps      | 1167360      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0013331678 |\n",
      "|    clip_fraction        | 0.00786      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.27        |\n",
      "|    explained_variance   | 0.758        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 664          |\n",
      "|    n_updates            | 5690         |\n",
      "|    policy_gradient_loss | -0.00119     |\n",
      "|    value_loss           | 532          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.12e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 571           |\n",
      "|    time_elapsed         | 20252         |\n",
      "|    total_timesteps      | 1169408       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00082621514 |\n",
      "|    clip_fraction        | 0.0104        |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.269        |\n",
      "|    explained_variance   | 0.778         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 103           |\n",
      "|    n_updates            | 5700          |\n",
      "|    policy_gradient_loss | -0.00104      |\n",
      "|    value_loss           | 403           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 572          |\n",
      "|    time_elapsed         | 20287        |\n",
      "|    total_timesteps      | 1171456      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012609976 |\n",
      "|    clip_fraction        | 0.015        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.294       |\n",
      "|    explained_variance   | 0.79         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 125          |\n",
      "|    n_updates            | 5710         |\n",
      "|    policy_gradient_loss | -0.00103     |\n",
      "|    value_loss           | 479          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 573          |\n",
      "|    time_elapsed         | 20323        |\n",
      "|    total_timesteps      | 1173504      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010759861 |\n",
      "|    clip_fraction        | 0.00317      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.326       |\n",
      "|    explained_variance   | 0.775        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 204          |\n",
      "|    n_updates            | 5720         |\n",
      "|    policy_gradient_loss | -0.000569    |\n",
      "|    value_loss           | 498          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.94e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 574          |\n",
      "|    time_elapsed         | 20358        |\n",
      "|    total_timesteps      | 1175552      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012028941 |\n",
      "|    clip_fraction        | 0.0118       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.359       |\n",
      "|    explained_variance   | 0.719        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 159          |\n",
      "|    n_updates            | 5730         |\n",
      "|    policy_gradient_loss | -0.00142     |\n",
      "|    value_loss           | 486          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.13e+03      |\n",
      "|    ep_rew_mean          | 1.95e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 575           |\n",
      "|    time_elapsed         | 20394         |\n",
      "|    total_timesteps      | 1177600       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00028964053 |\n",
      "|    clip_fraction        | 0.000732      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.38         |\n",
      "|    explained_variance   | 0.72          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 310           |\n",
      "|    n_updates            | 5740          |\n",
      "|    policy_gradient_loss | -0.000133     |\n",
      "|    value_loss           | 521           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.14e+03      |\n",
      "|    ep_rew_mean          | 1.97e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 576           |\n",
      "|    time_elapsed         | 20429         |\n",
      "|    total_timesteps      | 1179648       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00074483955 |\n",
      "|    clip_fraction        | 0.00278       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.366        |\n",
      "|    explained_variance   | 0.684         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 459           |\n",
      "|    n_updates            | 5750          |\n",
      "|    policy_gradient_loss | -0.000477     |\n",
      "|    value_loss           | 498           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.97e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 577          |\n",
      "|    time_elapsed         | 20465        |\n",
      "|    total_timesteps      | 1181696      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012675656 |\n",
      "|    clip_fraction        | 0.00508      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.365       |\n",
      "|    explained_variance   | 0.651        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 200          |\n",
      "|    n_updates            | 5760         |\n",
      "|    policy_gradient_loss | -0.000588    |\n",
      "|    value_loss           | 524          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.15e+03      |\n",
      "|    ep_rew_mean          | 1.98e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 578           |\n",
      "|    time_elapsed         | 20500         |\n",
      "|    total_timesteps      | 1183744       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00077982317 |\n",
      "|    clip_fraction        | 0.00142       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.393        |\n",
      "|    explained_variance   | 0.875         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 127           |\n",
      "|    n_updates            | 5770          |\n",
      "|    policy_gradient_loss | -6.47e-05     |\n",
      "|    value_loss           | 281           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 2.01e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 579          |\n",
      "|    time_elapsed         | 20538        |\n",
      "|    total_timesteps      | 1185792      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0019634594 |\n",
      "|    clip_fraction        | 0.0195       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.343       |\n",
      "|    explained_variance   | 0.816        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 96           |\n",
      "|    n_updates            | 5780         |\n",
      "|    policy_gradient_loss | -0.00147     |\n",
      "|    value_loss           | 436          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.16e+03    |\n",
      "|    ep_rew_mean          | 2.01e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 580         |\n",
      "|    time_elapsed         | 20572       |\n",
      "|    total_timesteps      | 1187840     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.005405776 |\n",
      "|    clip_fraction        | 0.0662      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.604      |\n",
      "|    explained_variance   | 0.797       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 186         |\n",
      "|    n_updates            | 5790        |\n",
      "|    policy_gradient_loss | 0.00153     |\n",
      "|    value_loss           | 685         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 2e+03        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 581          |\n",
      "|    time_elapsed         | 20607        |\n",
      "|    total_timesteps      | 1189888      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005713599 |\n",
      "|    clip_fraction        | 0.000879     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.382       |\n",
      "|    explained_variance   | 0.762        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 152          |\n",
      "|    n_updates            | 5800         |\n",
      "|    policy_gradient_loss | -0.00082     |\n",
      "|    value_loss           | 490          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 2e+03        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 582          |\n",
      "|    time_elapsed         | 20643        |\n",
      "|    total_timesteps      | 1191936      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007825731 |\n",
      "|    clip_fraction        | 0.000732     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.366       |\n",
      "|    explained_variance   | 0.74         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 428          |\n",
      "|    n_updates            | 5810         |\n",
      "|    policy_gradient_loss | -0.000609    |\n",
      "|    value_loss           | 542          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.15e+03    |\n",
      "|    ep_rew_mean          | 1.99e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 583         |\n",
      "|    time_elapsed         | 20678       |\n",
      "|    total_timesteps      | 1193984     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.000299102 |\n",
      "|    clip_fraction        | 0           |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.328      |\n",
      "|    explained_variance   | 0.758       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 88.8        |\n",
      "|    n_updates            | 5820        |\n",
      "|    policy_gradient_loss | -0.000382   |\n",
      "|    value_loss           | 438         |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.16e+03      |\n",
      "|    ep_rew_mean          | 2e+03         |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 584           |\n",
      "|    time_elapsed         | 20713         |\n",
      "|    total_timesteps      | 1196032       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00037567556 |\n",
      "|    clip_fraction        | 0.0019        |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.365        |\n",
      "|    explained_variance   | 0.755         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 196           |\n",
      "|    n_updates            | 5830          |\n",
      "|    policy_gradient_loss | -0.000535     |\n",
      "|    value_loss           | 521           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 2.01e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 585          |\n",
      "|    time_elapsed         | 20748        |\n",
      "|    total_timesteps      | 1198080      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0013558574 |\n",
      "|    clip_fraction        | 0.0103       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.303       |\n",
      "|    explained_variance   | 0.701        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 174          |\n",
      "|    n_updates            | 5840         |\n",
      "|    policy_gradient_loss | -0.00134     |\n",
      "|    value_loss           | 435          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.14e+03     |\n",
      "|    ep_rew_mean          | 1.99e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 586          |\n",
      "|    time_elapsed         | 20784        |\n",
      "|    total_timesteps      | 1200128      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010703871 |\n",
      "|    clip_fraction        | 0.0189       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.357       |\n",
      "|    explained_variance   | 0.54         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 169          |\n",
      "|    n_updates            | 5850         |\n",
      "|    policy_gradient_loss | -0.000166    |\n",
      "|    value_loss           | 697          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.12e+03      |\n",
      "|    ep_rew_mean          | 1.97e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 587           |\n",
      "|    time_elapsed         | 20820         |\n",
      "|    total_timesteps      | 1202176       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00078141945 |\n",
      "|    clip_fraction        | 0.00215       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.303        |\n",
      "|    explained_variance   | 0.692         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 160           |\n",
      "|    n_updates            | 5860          |\n",
      "|    policy_gradient_loss | -0.000419     |\n",
      "|    value_loss           | 564           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.95e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 588           |\n",
      "|    time_elapsed         | 20855         |\n",
      "|    total_timesteps      | 1204224       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00049100735 |\n",
      "|    clip_fraction        | 0.00264       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.322        |\n",
      "|    explained_variance   | 0.65          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 124           |\n",
      "|    n_updates            | 5870          |\n",
      "|    policy_gradient_loss | -0.000292     |\n",
      "|    value_loss           | 557           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.94e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 589          |\n",
      "|    time_elapsed         | 20891        |\n",
      "|    total_timesteps      | 1206272      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011543827 |\n",
      "|    clip_fraction        | 0.0124       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.352       |\n",
      "|    explained_variance   | 0.749        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 332          |\n",
      "|    n_updates            | 5880         |\n",
      "|    policy_gradient_loss | -0.00122     |\n",
      "|    value_loss           | 507          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.95e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 590          |\n",
      "|    time_elapsed         | 20926        |\n",
      "|    total_timesteps      | 1208320      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 8.206116e-05 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.335       |\n",
      "|    explained_variance   | 0.812        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 138          |\n",
      "|    n_updates            | 5890         |\n",
      "|    policy_gradient_loss | 1.7e-05      |\n",
      "|    value_loss           | 341          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.95e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 591          |\n",
      "|    time_elapsed         | 20962        |\n",
      "|    total_timesteps      | 1210368      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011822477 |\n",
      "|    clip_fraction        | 0.0114       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.396       |\n",
      "|    explained_variance   | 0.767        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 133          |\n",
      "|    n_updates            | 5900         |\n",
      "|    policy_gradient_loss | -0.00149     |\n",
      "|    value_loss           | 530          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.95e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 592          |\n",
      "|    time_elapsed         | 20997        |\n",
      "|    total_timesteps      | 1212416      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0016378917 |\n",
      "|    clip_fraction        | 0.012        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.355       |\n",
      "|    explained_variance   | 0.858        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 99.5         |\n",
      "|    n_updates            | 5910         |\n",
      "|    policy_gradient_loss | -0.00118     |\n",
      "|    value_loss           | 318          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.97e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 593          |\n",
      "|    time_elapsed         | 21033        |\n",
      "|    total_timesteps      | 1214464      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010162892 |\n",
      "|    clip_fraction        | 0.00923      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.328       |\n",
      "|    explained_variance   | 0.763        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 262          |\n",
      "|    n_updates            | 5920         |\n",
      "|    policy_gradient_loss | -0.0014      |\n",
      "|    value_loss           | 573          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.96e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 594          |\n",
      "|    time_elapsed         | 21069        |\n",
      "|    total_timesteps      | 1216512      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012614133 |\n",
      "|    clip_fraction        | 0.00415      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.276       |\n",
      "|    explained_variance   | 0.805        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 161          |\n",
      "|    n_updates            | 5930         |\n",
      "|    policy_gradient_loss | 0.000253     |\n",
      "|    value_loss           | 324          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.96e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 595          |\n",
      "|    time_elapsed         | 21105        |\n",
      "|    total_timesteps      | 1218560      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007609391 |\n",
      "|    clip_fraction        | 0.00513      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.289       |\n",
      "|    explained_variance   | 0.729        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 142          |\n",
      "|    n_updates            | 5940         |\n",
      "|    policy_gradient_loss | -0.000606    |\n",
      "|    value_loss           | 511          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.96e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 596          |\n",
      "|    time_elapsed         | 21141        |\n",
      "|    total_timesteps      | 1220608      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005958519 |\n",
      "|    clip_fraction        | 0.00127      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.313       |\n",
      "|    explained_variance   | 0.79         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 220          |\n",
      "|    n_updates            | 5950         |\n",
      "|    policy_gradient_loss | -0.000709    |\n",
      "|    value_loss           | 389          |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 1.12e+03   |\n",
      "|    ep_rew_mean          | 1.97e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 57         |\n",
      "|    iterations           | 597        |\n",
      "|    time_elapsed         | 21175      |\n",
      "|    total_timesteps      | 1222656    |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.00120574 |\n",
      "|    clip_fraction        | 0.00142    |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -0.362     |\n",
      "|    explained_variance   | 0.798      |\n",
      "|    learning_rate        | 1e-06      |\n",
      "|    loss                 | 155        |\n",
      "|    n_updates            | 5960       |\n",
      "|    policy_gradient_loss | -0.000524  |\n",
      "|    value_loss           | 437        |\n",
      "----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.95e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 598           |\n",
      "|    time_elapsed         | 21211         |\n",
      "|    total_timesteps      | 1224704       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00013909512 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.305        |\n",
      "|    explained_variance   | 0.783         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 646           |\n",
      "|    n_updates            | 5970          |\n",
      "|    policy_gradient_loss | -0.000169     |\n",
      "|    value_loss           | 519           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.95e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 599           |\n",
      "|    time_elapsed         | 21248         |\n",
      "|    total_timesteps      | 1226752       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00015293594 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.278        |\n",
      "|    explained_variance   | 0.857         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 142           |\n",
      "|    n_updates            | 5980          |\n",
      "|    policy_gradient_loss | -0.000185     |\n",
      "|    value_loss           | 357           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.95e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 600           |\n",
      "|    time_elapsed         | 21284         |\n",
      "|    total_timesteps      | 1228800       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00038518282 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.337        |\n",
      "|    explained_variance   | 0.878         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 194           |\n",
      "|    n_updates            | 5990          |\n",
      "|    policy_gradient_loss | -0.000103     |\n",
      "|    value_loss           | 336           |\n",
      "-------------------------------------------\n",
      "--------------------------------------------\n",
      "| rollout/                |                |\n",
      "|    ep_len_mean          | 1.11e+03       |\n",
      "|    ep_rew_mean          | 1.95e+03       |\n",
      "| time/                   |                |\n",
      "|    fps                  | 57             |\n",
      "|    iterations           | 601            |\n",
      "|    time_elapsed         | 21323          |\n",
      "|    total_timesteps      | 1230848        |\n",
      "| train/                  |                |\n",
      "|    approx_kl            | 0.000113517715 |\n",
      "|    clip_fraction        | 9.77e-05       |\n",
      "|    clip_range           | 0.2            |\n",
      "|    entropy_loss         | -0.393         |\n",
      "|    explained_variance   | 0.824          |\n",
      "|    learning_rate        | 1e-06          |\n",
      "|    loss                 | 188            |\n",
      "|    n_updates            | 6000           |\n",
      "|    policy_gradient_loss | 7.35e-05       |\n",
      "|    value_loss           | 597            |\n",
      "--------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.95e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 602           |\n",
      "|    time_elapsed         | 21359         |\n",
      "|    total_timesteps      | 1232896       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00083136867 |\n",
      "|    clip_fraction        | 0.00103       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.76         |\n",
      "|    explained_variance   | 0.722         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 13.3          |\n",
      "|    n_updates            | 6010          |\n",
      "|    policy_gradient_loss | -0.000223     |\n",
      "|    value_loss           | 203           |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.17e+03    |\n",
      "|    ep_rew_mean          | 1.98e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 603         |\n",
      "|    time_elapsed         | 21394       |\n",
      "|    total_timesteps      | 1234944     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.013948524 |\n",
      "|    clip_fraction        | 0.0968      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.77       |\n",
      "|    explained_variance   | 0.695       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 6.6         |\n",
      "|    n_updates            | 6020        |\n",
      "|    policy_gradient_loss | -0.00589    |\n",
      "|    value_loss           | 19.8        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.17e+03    |\n",
      "|    ep_rew_mean          | 1.98e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 604         |\n",
      "|    time_elapsed         | 21429       |\n",
      "|    total_timesteps      | 1236992     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004191124 |\n",
      "|    clip_fraction        | 0.0402      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.38       |\n",
      "|    explained_variance   | 0.912       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 78.4        |\n",
      "|    n_updates            | 6030        |\n",
      "|    policy_gradient_loss | 0.00116     |\n",
      "|    value_loss           | 227         |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.17e+03      |\n",
      "|    ep_rew_mean          | 1.99e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 605           |\n",
      "|    time_elapsed         | 21465         |\n",
      "|    total_timesteps      | 1239040       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00062761846 |\n",
      "|    clip_fraction        | 0.00713       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.298        |\n",
      "|    explained_variance   | 0.683         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 182           |\n",
      "|    n_updates            | 6040          |\n",
      "|    policy_gradient_loss | -0.00057      |\n",
      "|    value_loss           | 353           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.17e+03      |\n",
      "|    ep_rew_mean          | 1.98e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 606           |\n",
      "|    time_elapsed         | 21501         |\n",
      "|    total_timesteps      | 1241088       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00081931194 |\n",
      "|    clip_fraction        | 0.00479       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.317        |\n",
      "|    explained_variance   | 0.573         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 137           |\n",
      "|    n_updates            | 6050          |\n",
      "|    policy_gradient_loss | -0.000886     |\n",
      "|    value_loss           | 614           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.16e+03      |\n",
      "|    ep_rew_mean          | 1.97e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 607           |\n",
      "|    time_elapsed         | 21535         |\n",
      "|    total_timesteps      | 1243136       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00020932464 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.347        |\n",
      "|    explained_variance   | 0.571         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 163           |\n",
      "|    n_updates            | 6060          |\n",
      "|    policy_gradient_loss | -0.00018      |\n",
      "|    value_loss           | 621           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.15e+03      |\n",
      "|    ep_rew_mean          | 1.97e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 608           |\n",
      "|    time_elapsed         | 21571         |\n",
      "|    total_timesteps      | 1245184       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00025940317 |\n",
      "|    clip_fraction        | 0.000684      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.333        |\n",
      "|    explained_variance   | 0.569         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 143           |\n",
      "|    n_updates            | 6070          |\n",
      "|    policy_gradient_loss | -0.000498     |\n",
      "|    value_loss           | 517           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 1.98e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 609          |\n",
      "|    time_elapsed         | 21606        |\n",
      "|    total_timesteps      | 1247232      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012535816 |\n",
      "|    clip_fraction        | 0.0226       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.404       |\n",
      "|    explained_variance   | 0.696        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 181          |\n",
      "|    n_updates            | 6080         |\n",
      "|    policy_gradient_loss | -0.00164     |\n",
      "|    value_loss           | 481          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 1.98e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 610          |\n",
      "|    time_elapsed         | 21641        |\n",
      "|    total_timesteps      | 1249280      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012247338 |\n",
      "|    clip_fraction        | 9.77e-05     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.392       |\n",
      "|    explained_variance   | 0.663        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 420          |\n",
      "|    n_updates            | 6090         |\n",
      "|    policy_gradient_loss | -0.000871    |\n",
      "|    value_loss           | 685          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.17e+03     |\n",
      "|    ep_rew_mean          | 2e+03        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 611          |\n",
      "|    time_elapsed         | 21677        |\n",
      "|    total_timesteps      | 1251328      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012357372 |\n",
      "|    clip_fraction        | 0.0186       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.404       |\n",
      "|    explained_variance   | 0.749        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 114          |\n",
      "|    n_updates            | 6100         |\n",
      "|    policy_gradient_loss | -0.00152     |\n",
      "|    value_loss           | 408          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.17e+03     |\n",
      "|    ep_rew_mean          | 1.99e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 612          |\n",
      "|    time_elapsed         | 21713        |\n",
      "|    total_timesteps      | 1253376      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0017090242 |\n",
      "|    clip_fraction        | 0.0119       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.386       |\n",
      "|    explained_variance   | 0.741        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 176          |\n",
      "|    n_updates            | 6110         |\n",
      "|    policy_gradient_loss | -0.00125     |\n",
      "|    value_loss           | 519          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.17e+03     |\n",
      "|    ep_rew_mean          | 2e+03        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 613          |\n",
      "|    time_elapsed         | 21748        |\n",
      "|    total_timesteps      | 1255424      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014856725 |\n",
      "|    clip_fraction        | 0.00972      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.314       |\n",
      "|    explained_variance   | 0.669        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 274          |\n",
      "|    n_updates            | 6120         |\n",
      "|    policy_gradient_loss | -0.000802    |\n",
      "|    value_loss           | 484          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.17e+03     |\n",
      "|    ep_rew_mean          | 2.01e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 614          |\n",
      "|    time_elapsed         | 21784        |\n",
      "|    total_timesteps      | 1257472      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006638013 |\n",
      "|    clip_fraction        | 0.000586     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.322       |\n",
      "|    explained_variance   | 0.69         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 119          |\n",
      "|    n_updates            | 6130         |\n",
      "|    policy_gradient_loss | -0.00059     |\n",
      "|    value_loss           | 481          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.18e+03      |\n",
      "|    ep_rew_mean          | 2.02e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 615           |\n",
      "|    time_elapsed         | 21819         |\n",
      "|    total_timesteps      | 1259520       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00071705587 |\n",
      "|    clip_fraction        | 0.00127       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.35         |\n",
      "|    explained_variance   | 0.714         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 126           |\n",
      "|    n_updates            | 6140          |\n",
      "|    policy_gradient_loss | 7.53e-05      |\n",
      "|    value_loss           | 424           |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.17e+03    |\n",
      "|    ep_rew_mean          | 2.01e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 616         |\n",
      "|    time_elapsed         | 21856       |\n",
      "|    total_timesteps      | 1261568     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.000866827 |\n",
      "|    clip_fraction        | 0.00947     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.265      |\n",
      "|    explained_variance   | 0.703       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 330         |\n",
      "|    n_updates            | 6150        |\n",
      "|    policy_gradient_loss | -0.000235   |\n",
      "|    value_loss           | 460         |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.15e+03      |\n",
      "|    ep_rew_mean          | 1.97e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 617           |\n",
      "|    time_elapsed         | 21892         |\n",
      "|    total_timesteps      | 1263616       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00078740664 |\n",
      "|    clip_fraction        | 0.0163        |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.303        |\n",
      "|    explained_variance   | 0.573         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 548           |\n",
      "|    n_updates            | 6160          |\n",
      "|    policy_gradient_loss | -0.00134      |\n",
      "|    value_loss           | 667           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.15e+03     |\n",
      "|    ep_rew_mean          | 1.98e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 618          |\n",
      "|    time_elapsed         | 21928        |\n",
      "|    total_timesteps      | 1265664      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010136154 |\n",
      "|    clip_fraction        | 0.00679      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.337       |\n",
      "|    explained_variance   | 0.739        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 111          |\n",
      "|    n_updates            | 6170         |\n",
      "|    policy_gradient_loss | -0.000935    |\n",
      "|    value_loss           | 504          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.15e+03     |\n",
      "|    ep_rew_mean          | 1.98e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 619          |\n",
      "|    time_elapsed         | 21964        |\n",
      "|    total_timesteps      | 1267712      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006789309 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.364       |\n",
      "|    explained_variance   | 0.704        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 122          |\n",
      "|    n_updates            | 6180         |\n",
      "|    policy_gradient_loss | -0.000653    |\n",
      "|    value_loss           | 548          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.15e+03     |\n",
      "|    ep_rew_mean          | 1.97e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 620          |\n",
      "|    time_elapsed         | 22000        |\n",
      "|    total_timesteps      | 1269760      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008034092 |\n",
      "|    clip_fraction        | 0.00601      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.291       |\n",
      "|    explained_variance   | 0.646        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 297          |\n",
      "|    n_updates            | 6190         |\n",
      "|    policy_gradient_loss | -0.00113     |\n",
      "|    value_loss           | 446          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.16e+03      |\n",
      "|    ep_rew_mean          | 1.99e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 621           |\n",
      "|    time_elapsed         | 22035         |\n",
      "|    total_timesteps      | 1271808       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00094520976 |\n",
      "|    clip_fraction        | 0.00864       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.314        |\n",
      "|    explained_variance   | 0.706         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 547           |\n",
      "|    n_updates            | 6200          |\n",
      "|    policy_gradient_loss | -0.000821     |\n",
      "|    value_loss           | 617           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 1.98e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 622          |\n",
      "|    time_elapsed         | 22070        |\n",
      "|    total_timesteps      | 1273856      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010586894 |\n",
      "|    clip_fraction        | 0.00669      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.284       |\n",
      "|    explained_variance   | 0.769        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 123          |\n",
      "|    n_updates            | 6210         |\n",
      "|    policy_gradient_loss | -0.000945    |\n",
      "|    value_loss           | 304          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.15e+03      |\n",
      "|    ep_rew_mean          | 1.97e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 623           |\n",
      "|    time_elapsed         | 22105         |\n",
      "|    total_timesteps      | 1275904       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00027481947 |\n",
      "|    clip_fraction        | 0.00151       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.27         |\n",
      "|    explained_variance   | 0.702         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 460           |\n",
      "|    n_updates            | 6220          |\n",
      "|    policy_gradient_loss | -0.000533     |\n",
      "|    value_loss           | 567           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.15e+03      |\n",
      "|    ep_rew_mean          | 1.97e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 624           |\n",
      "|    time_elapsed         | 22141         |\n",
      "|    total_timesteps      | 1277952       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00019281599 |\n",
      "|    clip_fraction        | 0.000439      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.267        |\n",
      "|    explained_variance   | 0.622         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 276           |\n",
      "|    n_updates            | 6230          |\n",
      "|    policy_gradient_loss | -0.000378     |\n",
      "|    value_loss           | 537           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.14e+03     |\n",
      "|    ep_rew_mean          | 1.96e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 625          |\n",
      "|    time_elapsed         | 22178        |\n",
      "|    total_timesteps      | 1280000      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0017301141 |\n",
      "|    clip_fraction        | 0.011        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.278       |\n",
      "|    explained_variance   | 0.791        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 194          |\n",
      "|    n_updates            | 6240         |\n",
      "|    policy_gradient_loss | -0.00108     |\n",
      "|    value_loss           | 481          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.15e+03      |\n",
      "|    ep_rew_mean          | 1.97e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 626           |\n",
      "|    time_elapsed         | 22214         |\n",
      "|    total_timesteps      | 1282048       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00067593576 |\n",
      "|    clip_fraction        | 0.00747       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.263        |\n",
      "|    explained_variance   | 0.757         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 787           |\n",
      "|    n_updates            | 6250          |\n",
      "|    policy_gradient_loss | -0.00131      |\n",
      "|    value_loss           | 508           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.15e+03     |\n",
      "|    ep_rew_mean          | 1.98e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 627          |\n",
      "|    time_elapsed         | 22249        |\n",
      "|    total_timesteps      | 1284096      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005253198 |\n",
      "|    clip_fraction        | 0.00547      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.298       |\n",
      "|    explained_variance   | 0.796        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 194          |\n",
      "|    n_updates            | 6260         |\n",
      "|    policy_gradient_loss | -0.00063     |\n",
      "|    value_loss           | 453          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.15e+03     |\n",
      "|    ep_rew_mean          | 1.98e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 628          |\n",
      "|    time_elapsed         | 22284        |\n",
      "|    total_timesteps      | 1286144      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006235919 |\n",
      "|    clip_fraction        | 0.0132       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.343       |\n",
      "|    explained_variance   | 0.71         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 136          |\n",
      "|    n_updates            | 6270         |\n",
      "|    policy_gradient_loss | -0.00135     |\n",
      "|    value_loss           | 546          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 1.99e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 629          |\n",
      "|    time_elapsed         | 22320        |\n",
      "|    total_timesteps      | 1288192      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011111577 |\n",
      "|    clip_fraction        | 0.00269      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.301       |\n",
      "|    explained_variance   | 0.738        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 266          |\n",
      "|    n_updates            | 6280         |\n",
      "|    policy_gradient_loss | -0.000515    |\n",
      "|    value_loss           | 330          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.14e+03      |\n",
      "|    ep_rew_mean          | 1.95e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 630           |\n",
      "|    time_elapsed         | 22355         |\n",
      "|    total_timesteps      | 1290240       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00093373295 |\n",
      "|    clip_fraction        | 0.00405       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.263        |\n",
      "|    explained_variance   | 0.595         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 964           |\n",
      "|    n_updates            | 6290          |\n",
      "|    policy_gradient_loss | -0.000283     |\n",
      "|    value_loss           | 503           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.14e+03     |\n",
      "|    ep_rew_mean          | 1.96e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 631          |\n",
      "|    time_elapsed         | 22391        |\n",
      "|    total_timesteps      | 1292288      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005607215 |\n",
      "|    clip_fraction        | 0.00449      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.251       |\n",
      "|    explained_variance   | 0.756        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 160          |\n",
      "|    n_updates            | 6300         |\n",
      "|    policy_gradient_loss | -0.000751    |\n",
      "|    value_loss           | 434          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.91e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 632           |\n",
      "|    time_elapsed         | 22427         |\n",
      "|    total_timesteps      | 1294336       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00018749546 |\n",
      "|    clip_fraction        | 0.000391      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.263        |\n",
      "|    explained_variance   | 0.811         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 281           |\n",
      "|    n_updates            | 6310          |\n",
      "|    policy_gradient_loss | -0.000263     |\n",
      "|    value_loss           | 377           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.08e+03      |\n",
      "|    ep_rew_mean          | 1.86e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 633           |\n",
      "|    time_elapsed         | 22462         |\n",
      "|    total_timesteps      | 1296384       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00025716383 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.216        |\n",
      "|    explained_variance   | 0.653         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 284           |\n",
      "|    n_updates            | 6320          |\n",
      "|    policy_gradient_loss | -8.32e-05     |\n",
      "|    value_loss           | 609           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.09e+03     |\n",
      "|    ep_rew_mean          | 1.87e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 634          |\n",
      "|    time_elapsed         | 22498        |\n",
      "|    total_timesteps      | 1298432      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0004527016 |\n",
      "|    clip_fraction        | 0.00566      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.241       |\n",
      "|    explained_variance   | 0.69         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 422          |\n",
      "|    n_updates            | 6330         |\n",
      "|    policy_gradient_loss | -0.00019     |\n",
      "|    value_loss           | 547          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.09e+03     |\n",
      "|    ep_rew_mean          | 1.88e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 635          |\n",
      "|    time_elapsed         | 22534        |\n",
      "|    total_timesteps      | 1300480      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006758552 |\n",
      "|    clip_fraction        | 0.00195      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.239       |\n",
      "|    explained_variance   | 0.79         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 112          |\n",
      "|    n_updates            | 6340         |\n",
      "|    policy_gradient_loss | -5.15e-05    |\n",
      "|    value_loss           | 332          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.1e+03       |\n",
      "|    ep_rew_mean          | 1.9e+03       |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 636           |\n",
      "|    time_elapsed         | 22569         |\n",
      "|    total_timesteps      | 1302528       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00028074297 |\n",
      "|    clip_fraction        | 0.000391      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.24         |\n",
      "|    explained_variance   | 0.805         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 95.3          |\n",
      "|    n_updates            | 6350          |\n",
      "|    policy_gradient_loss | -0.000189     |\n",
      "|    value_loss           | 404           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.9e+03       |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 637           |\n",
      "|    time_elapsed         | 22604         |\n",
      "|    total_timesteps      | 1304576       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00068227074 |\n",
      "|    clip_fraction        | 0.011         |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.294        |\n",
      "|    explained_variance   | 0.747         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 113           |\n",
      "|    n_updates            | 6360          |\n",
      "|    policy_gradient_loss | -0.000802     |\n",
      "|    value_loss           | 444           |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.11e+03    |\n",
      "|    ep_rew_mean          | 1.91e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 638         |\n",
      "|    time_elapsed         | 22639       |\n",
      "|    total_timesteps      | 1306624     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.000673283 |\n",
      "|    clip_fraction        | 0.00957     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.298      |\n",
      "|    explained_variance   | 0.742       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 111         |\n",
      "|    n_updates            | 6370        |\n",
      "|    policy_gradient_loss | -0.000597   |\n",
      "|    value_loss           | 328         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.89e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 639          |\n",
      "|    time_elapsed         | 22675        |\n",
      "|    total_timesteps      | 1308672      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0009450306 |\n",
      "|    clip_fraction        | 0.0152       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.272       |\n",
      "|    explained_variance   | 0.753        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 161          |\n",
      "|    n_updates            | 6380         |\n",
      "|    policy_gradient_loss | -0.00153     |\n",
      "|    value_loss           | 457          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 640          |\n",
      "|    time_elapsed         | 22711        |\n",
      "|    total_timesteps      | 1310720      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005734556 |\n",
      "|    clip_fraction        | 0.0128       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.317       |\n",
      "|    explained_variance   | 0.628        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 193          |\n",
      "|    n_updates            | 6390         |\n",
      "|    policy_gradient_loss | -0.000179    |\n",
      "|    value_loss           | 575          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.12e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 641           |\n",
      "|    time_elapsed         | 22746         |\n",
      "|    total_timesteps      | 1312768       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00039856337 |\n",
      "|    clip_fraction        | 0.000537      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.348        |\n",
      "|    explained_variance   | 0.759         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 123           |\n",
      "|    n_updates            | 6400          |\n",
      "|    policy_gradient_loss | -0.000473     |\n",
      "|    value_loss           | 421           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.13e+03      |\n",
      "|    ep_rew_mean          | 1.95e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 642           |\n",
      "|    time_elapsed         | 22782         |\n",
      "|    total_timesteps      | 1314816       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00016278352 |\n",
      "|    clip_fraction        | 0.000293      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.326        |\n",
      "|    explained_variance   | 0.722         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 218           |\n",
      "|    n_updates            | 6410          |\n",
      "|    policy_gradient_loss | -0.000186     |\n",
      "|    value_loss           | 439           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.93e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 643          |\n",
      "|    time_elapsed         | 22818        |\n",
      "|    total_timesteps      | 1316864      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008822145 |\n",
      "|    clip_fraction        | 0.00566      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.374       |\n",
      "|    explained_variance   | 0.661        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 371          |\n",
      "|    n_updates            | 6420         |\n",
      "|    policy_gradient_loss | -0.000397    |\n",
      "|    value_loss           | 505          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.12e+03    |\n",
      "|    ep_rew_mean          | 1.92e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 644         |\n",
      "|    time_elapsed         | 22854       |\n",
      "|    total_timesteps      | 1318912     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.000340219 |\n",
      "|    clip_fraction        | 0.000342    |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.371      |\n",
      "|    explained_variance   | 0.703       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 204         |\n",
      "|    n_updates            | 6430        |\n",
      "|    policy_gradient_loss | -0.00051    |\n",
      "|    value_loss           | 521         |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 645           |\n",
      "|    time_elapsed         | 22891         |\n",
      "|    total_timesteps      | 1320960       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00071279914 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.348        |\n",
      "|    explained_variance   | 0.746         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 258           |\n",
      "|    n_updates            | 6440          |\n",
      "|    policy_gradient_loss | -0.000503     |\n",
      "|    value_loss           | 555           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 646          |\n",
      "|    time_elapsed         | 22924        |\n",
      "|    total_timesteps      | 1323008      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0009581538 |\n",
      "|    clip_fraction        | 0.00718      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.371       |\n",
      "|    explained_variance   | 0.781        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 143          |\n",
      "|    n_updates            | 6450         |\n",
      "|    policy_gradient_loss | -0.000367    |\n",
      "|    value_loss           | 412          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.12e+03      |\n",
      "|    ep_rew_mean          | 1.94e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 647           |\n",
      "|    time_elapsed         | 22960         |\n",
      "|    total_timesteps      | 1325056       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00031859815 |\n",
      "|    clip_fraction        | 0.00127       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.38         |\n",
      "|    explained_variance   | 0.707         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 141           |\n",
      "|    n_updates            | 6460          |\n",
      "|    policy_gradient_loss | -0.000235     |\n",
      "|    value_loss           | 415           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.93e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 648          |\n",
      "|    time_elapsed         | 22995        |\n",
      "|    total_timesteps      | 1327104      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005518637 |\n",
      "|    clip_fraction        | 0.00259      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.437       |\n",
      "|    explained_variance   | 0.706        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 149          |\n",
      "|    n_updates            | 6470         |\n",
      "|    policy_gradient_loss | -0.00128     |\n",
      "|    value_loss           | 426          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.12e+03    |\n",
      "|    ep_rew_mean          | 1.93e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 649         |\n",
      "|    time_elapsed         | 23030       |\n",
      "|    total_timesteps      | 1329152     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001174159 |\n",
      "|    clip_fraction        | 0.00659     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.431      |\n",
      "|    explained_variance   | 0.679       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 108         |\n",
      "|    n_updates            | 6480        |\n",
      "|    policy_gradient_loss | -0.000588   |\n",
      "|    value_loss           | 412         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.93e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 650          |\n",
      "|    time_elapsed         | 23065        |\n",
      "|    total_timesteps      | 1331200      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007917328 |\n",
      "|    clip_fraction        | 0.00488      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.484       |\n",
      "|    explained_variance   | 0.546        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 225          |\n",
      "|    n_updates            | 6490         |\n",
      "|    policy_gradient_loss | 0.000394     |\n",
      "|    value_loss           | 568          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.13e+03      |\n",
      "|    ep_rew_mean          | 1.93e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 651           |\n",
      "|    time_elapsed         | 23101         |\n",
      "|    total_timesteps      | 1333248       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00055418105 |\n",
      "|    clip_fraction        | 0.000146      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.473        |\n",
      "|    explained_variance   | 0.82          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 320           |\n",
      "|    n_updates            | 6500          |\n",
      "|    policy_gradient_loss | -0.00036      |\n",
      "|    value_loss           | 369           |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.12e+03    |\n",
      "|    ep_rew_mean          | 1.93e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 652         |\n",
      "|    time_elapsed         | 23136       |\n",
      "|    total_timesteps      | 1335296     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.003095093 |\n",
      "|    clip_fraction        | 0.0542      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.405      |\n",
      "|    explained_variance   | 0.743       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 251         |\n",
      "|    n_updates            | 6510        |\n",
      "|    policy_gradient_loss | -0.0021     |\n",
      "|    value_loss           | 442         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.93e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 653          |\n",
      "|    time_elapsed         | 23172        |\n",
      "|    total_timesteps      | 1337344      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0018588059 |\n",
      "|    clip_fraction        | 0.0264       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.43        |\n",
      "|    explained_variance   | 0.716        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 151          |\n",
      "|    n_updates            | 6520         |\n",
      "|    policy_gradient_loss | -0.00212     |\n",
      "|    value_loss           | 661          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.93e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 654          |\n",
      "|    time_elapsed         | 23211        |\n",
      "|    total_timesteps      | 1339392      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0018500917 |\n",
      "|    clip_fraction        | 0.0147       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.415       |\n",
      "|    explained_variance   | 0.768        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 96.8         |\n",
      "|    n_updates            | 6530         |\n",
      "|    policy_gradient_loss | -0.00207     |\n",
      "|    value_loss           | 392          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.04e+03      |\n",
      "|    ep_rew_mean          | 1.87e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 655           |\n",
      "|    time_elapsed         | 23251         |\n",
      "|    total_timesteps      | 1341440       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00057735445 |\n",
      "|    clip_fraction        | 0.00112       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.383        |\n",
      "|    explained_variance   | 0.805         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 129           |\n",
      "|    n_updates            | 6540          |\n",
      "|    policy_gradient_loss | -0.000324     |\n",
      "|    value_loss           | 404           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.05e+03     |\n",
      "|    ep_rew_mean          | 1.88e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 656          |\n",
      "|    time_elapsed         | 23292        |\n",
      "|    total_timesteps      | 1343488      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011875965 |\n",
      "|    clip_fraction        | 0.0019       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.362       |\n",
      "|    explained_variance   | 0.745        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 189          |\n",
      "|    n_updates            | 6550         |\n",
      "|    policy_gradient_loss | -0.000435    |\n",
      "|    value_loss           | 538          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.04e+03    |\n",
      "|    ep_rew_mean          | 1.88e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 657         |\n",
      "|    time_elapsed         | 23330       |\n",
      "|    total_timesteps      | 1345536     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001284888 |\n",
      "|    clip_fraction        | 0.00386     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.344      |\n",
      "|    explained_variance   | 0.766       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 153         |\n",
      "|    n_updates            | 6560        |\n",
      "|    policy_gradient_loss | -0.000823   |\n",
      "|    value_loss           | 478         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.05e+03     |\n",
      "|    ep_rew_mean          | 1.87e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 658          |\n",
      "|    time_elapsed         | 23370        |\n",
      "|    total_timesteps      | 1347584      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012908995 |\n",
      "|    clip_fraction        | 0.0118       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.356       |\n",
      "|    explained_variance   | 0.743        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 133          |\n",
      "|    n_updates            | 6570         |\n",
      "|    policy_gradient_loss | -0.000774    |\n",
      "|    value_loss           | 563          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.05e+03      |\n",
      "|    ep_rew_mean          | 1.87e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 659           |\n",
      "|    time_elapsed         | 23411         |\n",
      "|    total_timesteps      | 1349632       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00068808097 |\n",
      "|    clip_fraction        | 0.00356       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.312        |\n",
      "|    explained_variance   | 0.8           |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 250           |\n",
      "|    n_updates            | 6580          |\n",
      "|    policy_gradient_loss | -0.000434     |\n",
      "|    value_loss           | 462           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.04e+03     |\n",
      "|    ep_rew_mean          | 1.86e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 660          |\n",
      "|    time_elapsed         | 23450        |\n",
      "|    total_timesteps      | 1351680      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007866141 |\n",
      "|    clip_fraction        | 0.000488     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.304       |\n",
      "|    explained_variance   | 0.705        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 166          |\n",
      "|    n_updates            | 6590         |\n",
      "|    policy_gradient_loss | -0.00119     |\n",
      "|    value_loss           | 569          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.05e+03    |\n",
      "|    ep_rew_mean          | 1.87e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 661         |\n",
      "|    time_elapsed         | 23490       |\n",
      "|    total_timesteps      | 1353728     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001378576 |\n",
      "|    clip_fraction        | 0.0232      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.276      |\n",
      "|    explained_variance   | 0.772       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 115         |\n",
      "|    n_updates            | 6600        |\n",
      "|    policy_gradient_loss | -0.000572   |\n",
      "|    value_loss           | 496         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.05e+03     |\n",
      "|    ep_rew_mean          | 1.87e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 662          |\n",
      "|    time_elapsed         | 23528        |\n",
      "|    total_timesteps      | 1355776      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006875453 |\n",
      "|    clip_fraction        | 0.00498      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.271       |\n",
      "|    explained_variance   | 0.758        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 306          |\n",
      "|    n_updates            | 6610         |\n",
      "|    policy_gradient_loss | -0.00113     |\n",
      "|    value_loss           | 422          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.05e+03    |\n",
      "|    ep_rew_mean          | 1.89e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 663         |\n",
      "|    time_elapsed         | 23568       |\n",
      "|    total_timesteps      | 1357824     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.000621237 |\n",
      "|    clip_fraction        | 0.00703     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.316      |\n",
      "|    explained_variance   | 0.774       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 205         |\n",
      "|    n_updates            | 6620        |\n",
      "|    policy_gradient_loss | -0.0011     |\n",
      "|    value_loss           | 363         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.05e+03     |\n",
      "|    ep_rew_mean          | 1.89e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 664          |\n",
      "|    time_elapsed         | 23609        |\n",
      "|    total_timesteps      | 1359872      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0004984855 |\n",
      "|    clip_fraction        | 0.00181      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.318       |\n",
      "|    explained_variance   | 0.579        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 345          |\n",
      "|    n_updates            | 6630         |\n",
      "|    policy_gradient_loss | -0.000449    |\n",
      "|    value_loss           | 538          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.05e+03     |\n",
      "|    ep_rew_mean          | 1.89e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 665          |\n",
      "|    time_elapsed         | 23649        |\n",
      "|    total_timesteps      | 1361920      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010286628 |\n",
      "|    clip_fraction        | 0.00269      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.329       |\n",
      "|    explained_variance   | 0.679        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 371          |\n",
      "|    n_updates            | 6640         |\n",
      "|    policy_gradient_loss | -0.000993    |\n",
      "|    value_loss           | 516          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.06e+03      |\n",
      "|    ep_rew_mean          | 1.88e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 666           |\n",
      "|    time_elapsed         | 23686         |\n",
      "|    total_timesteps      | 1363968       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00095645327 |\n",
      "|    clip_fraction        | 0.00576       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.369        |\n",
      "|    explained_variance   | 0.63          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 307           |\n",
      "|    n_updates            | 6650          |\n",
      "|    policy_gradient_loss | -0.000309     |\n",
      "|    value_loss           | 519           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.05e+03     |\n",
      "|    ep_rew_mean          | 1.87e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 667          |\n",
      "|    time_elapsed         | 23725        |\n",
      "|    total_timesteps      | 1366016      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0017134487 |\n",
      "|    clip_fraction        | 0.0234       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.424       |\n",
      "|    explained_variance   | 0.714        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 287          |\n",
      "|    n_updates            | 6660         |\n",
      "|    policy_gradient_loss | -0.0018      |\n",
      "|    value_loss           | 463          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.05e+03      |\n",
      "|    ep_rew_mean          | 1.87e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 668           |\n",
      "|    time_elapsed         | 23765         |\n",
      "|    total_timesteps      | 1368064       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00094091595 |\n",
      "|    clip_fraction        | 0.00205       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.422        |\n",
      "|    explained_variance   | 0.761         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 92.2          |\n",
      "|    n_updates            | 6670          |\n",
      "|    policy_gradient_loss | -0.000542     |\n",
      "|    value_loss           | 457           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.05e+03      |\n",
      "|    ep_rew_mean          | 1.87e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 669           |\n",
      "|    time_elapsed         | 23807         |\n",
      "|    total_timesteps      | 1370112       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00045968406 |\n",
      "|    clip_fraction        | 9.77e-05      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.371        |\n",
      "|    explained_variance   | 0.729         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 127           |\n",
      "|    n_updates            | 6680          |\n",
      "|    policy_gradient_loss | -0.000136     |\n",
      "|    value_loss           | 466           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.04e+03     |\n",
      "|    ep_rew_mean          | 1.86e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 670          |\n",
      "|    time_elapsed         | 23846        |\n",
      "|    total_timesteps      | 1372160      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012501073 |\n",
      "|    clip_fraction        | 0.00498      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.405       |\n",
      "|    explained_variance   | 0.684        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 311          |\n",
      "|    n_updates            | 6690         |\n",
      "|    policy_gradient_loss | -0.000379    |\n",
      "|    value_loss           | 514          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.04e+03     |\n",
      "|    ep_rew_mean          | 1.86e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 671          |\n",
      "|    time_elapsed         | 23885        |\n",
      "|    total_timesteps      | 1374208      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006231752 |\n",
      "|    clip_fraction        | 0.00146      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.432       |\n",
      "|    explained_variance   | 0.754        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 285          |\n",
      "|    n_updates            | 6700         |\n",
      "|    policy_gradient_loss | -0.000715    |\n",
      "|    value_loss           | 495          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.05e+03     |\n",
      "|    ep_rew_mean          | 1.86e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 672          |\n",
      "|    time_elapsed         | 23924        |\n",
      "|    total_timesteps      | 1376256      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0020155774 |\n",
      "|    clip_fraction        | 0.0127       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.445       |\n",
      "|    explained_variance   | 0.706        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 354          |\n",
      "|    n_updates            | 6710         |\n",
      "|    policy_gradient_loss | -8.61e-05    |\n",
      "|    value_loss           | 427          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.06e+03     |\n",
      "|    ep_rew_mean          | 1.87e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 673          |\n",
      "|    time_elapsed         | 23964        |\n",
      "|    total_timesteps      | 1378304      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0020008315 |\n",
      "|    clip_fraction        | 0.00454      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.441       |\n",
      "|    explained_variance   | 0.824        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 119          |\n",
      "|    n_updates            | 6720         |\n",
      "|    policy_gradient_loss | -0.000418    |\n",
      "|    value_loss           | 356          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.06e+03      |\n",
      "|    ep_rew_mean          | 1.88e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 674           |\n",
      "|    time_elapsed         | 24004         |\n",
      "|    total_timesteps      | 1380352       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00013832591 |\n",
      "|    clip_fraction        | 0.000488      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.417        |\n",
      "|    explained_variance   | 0.727         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 279           |\n",
      "|    n_updates            | 6730          |\n",
      "|    policy_gradient_loss | -0.000484     |\n",
      "|    value_loss           | 571           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.07e+03     |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 675          |\n",
      "|    time_elapsed         | 24043        |\n",
      "|    total_timesteps      | 1382400      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0004638823 |\n",
      "|    clip_fraction        | 0.000391     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.387       |\n",
      "|    explained_variance   | 0.796        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 335          |\n",
      "|    n_updates            | 6740         |\n",
      "|    policy_gradient_loss | 0.000102     |\n",
      "|    value_loss           | 362          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.07e+03     |\n",
      "|    ep_rew_mean          | 1.89e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 676          |\n",
      "|    time_elapsed         | 24082        |\n",
      "|    total_timesteps      | 1384448      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0009769838 |\n",
      "|    clip_fraction        | 0.0183       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.397       |\n",
      "|    explained_variance   | 0.656        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 245          |\n",
      "|    n_updates            | 6750         |\n",
      "|    policy_gradient_loss | -0.00159     |\n",
      "|    value_loss           | 552          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.06e+03     |\n",
      "|    ep_rew_mean          | 1.89e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 677          |\n",
      "|    time_elapsed         | 24120        |\n",
      "|    total_timesteps      | 1386496      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0013171574 |\n",
      "|    clip_fraction        | 0.0215       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.401       |\n",
      "|    explained_variance   | 0.703        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 142          |\n",
      "|    n_updates            | 6760         |\n",
      "|    policy_gradient_loss | -0.00232     |\n",
      "|    value_loss           | 557          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.07e+03     |\n",
      "|    ep_rew_mean          | 1.89e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 678          |\n",
      "|    time_elapsed         | 24161        |\n",
      "|    total_timesteps      | 1388544      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0018776977 |\n",
      "|    clip_fraction        | 0.00698      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.385       |\n",
      "|    explained_variance   | 0.701        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 332          |\n",
      "|    n_updates            | 6770         |\n",
      "|    policy_gradient_loss | -0.000944    |\n",
      "|    value_loss           | 493          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.07e+03     |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 679          |\n",
      "|    time_elapsed         | 24200        |\n",
      "|    total_timesteps      | 1390592      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0003677551 |\n",
      "|    clip_fraction        | 0.000391     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.361       |\n",
      "|    explained_variance   | 0.78         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 101          |\n",
      "|    n_updates            | 6780         |\n",
      "|    policy_gradient_loss | -0.00011     |\n",
      "|    value_loss           | 454          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.08e+03      |\n",
      "|    ep_rew_mean          | 1.9e+03       |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 680           |\n",
      "|    time_elapsed         | 24239         |\n",
      "|    total_timesteps      | 1392640       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00085202797 |\n",
      "|    clip_fraction        | 0.00293       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.339        |\n",
      "|    explained_variance   | 0.858         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 154           |\n",
      "|    n_updates            | 6790          |\n",
      "|    policy_gradient_loss | -0.000318     |\n",
      "|    value_loss           | 321           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.09e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 681           |\n",
      "|    time_elapsed         | 24279         |\n",
      "|    total_timesteps      | 1394688       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00086736935 |\n",
      "|    clip_fraction        | 0.00864       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.327        |\n",
      "|    explained_variance   | 0.835         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 130           |\n",
      "|    n_updates            | 6800          |\n",
      "|    policy_gradient_loss | -0.000933     |\n",
      "|    value_loss           | 406           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.09e+03     |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 682          |\n",
      "|    time_elapsed         | 24317        |\n",
      "|    total_timesteps      | 1396736      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014388495 |\n",
      "|    clip_fraction        | 0.00845      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.346       |\n",
      "|    explained_variance   | 0.692        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 234          |\n",
      "|    n_updates            | 6810         |\n",
      "|    policy_gradient_loss | -0.00117     |\n",
      "|    value_loss           | 522          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.09e+03     |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 683          |\n",
      "|    time_elapsed         | 24360        |\n",
      "|    total_timesteps      | 1398784      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0002917931 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.288       |\n",
      "|    explained_variance   | 0.836        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 112          |\n",
      "|    n_updates            | 6820         |\n",
      "|    policy_gradient_loss | -0.000355    |\n",
      "|    value_loss           | 347          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 684          |\n",
      "|    time_elapsed         | 24400        |\n",
      "|    total_timesteps      | 1400832      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010825557 |\n",
      "|    clip_fraction        | 0.0199       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.326       |\n",
      "|    explained_variance   | 0.737        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 324          |\n",
      "|    n_updates            | 6830         |\n",
      "|    policy_gradient_loss | -0.00195     |\n",
      "|    value_loss           | 480          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.1e+03       |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 685           |\n",
      "|    time_elapsed         | 24436         |\n",
      "|    total_timesteps      | 1402880       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00096181163 |\n",
      "|    clip_fraction        | 0.00303       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.397        |\n",
      "|    explained_variance   | 0.75          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 295           |\n",
      "|    n_updates            | 6840          |\n",
      "|    policy_gradient_loss | -0.00112      |\n",
      "|    value_loss           | 470           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.93e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 686          |\n",
      "|    time_elapsed         | 24474        |\n",
      "|    total_timesteps      | 1404928      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011241063 |\n",
      "|    clip_fraction        | 0.00186      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.37        |\n",
      "|    explained_variance   | 0.553        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 129          |\n",
      "|    n_updates            | 6850         |\n",
      "|    policy_gradient_loss | -0.000983    |\n",
      "|    value_loss           | 746          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.96e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 687          |\n",
      "|    time_elapsed         | 24512        |\n",
      "|    total_timesteps      | 1406976      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011597273 |\n",
      "|    clip_fraction        | 0.0129       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.396       |\n",
      "|    explained_variance   | 0.763        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 187          |\n",
      "|    n_updates            | 6860         |\n",
      "|    policy_gradient_loss | -0.000951    |\n",
      "|    value_loss           | 512          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.14e+03     |\n",
      "|    ep_rew_mean          | 1.97e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 688          |\n",
      "|    time_elapsed         | 24550        |\n",
      "|    total_timesteps      | 1409024      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012802157 |\n",
      "|    clip_fraction        | 0.00532      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.498       |\n",
      "|    explained_variance   | 0.873        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 109          |\n",
      "|    n_updates            | 6870         |\n",
      "|    policy_gradient_loss | -0.000812    |\n",
      "|    value_loss           | 435          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.15e+03     |\n",
      "|    ep_rew_mean          | 1.99e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 689          |\n",
      "|    time_elapsed         | 24589        |\n",
      "|    total_timesteps      | 1411072      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012956521 |\n",
      "|    clip_fraction        | 0.00874      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.4         |\n",
      "|    explained_variance   | 0.837        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 41.8         |\n",
      "|    n_updates            | 6880         |\n",
      "|    policy_gradient_loss | -0.00118     |\n",
      "|    value_loss           | 289          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.14e+03     |\n",
      "|    ep_rew_mean          | 1.99e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 690          |\n",
      "|    time_elapsed         | 24627        |\n",
      "|    total_timesteps      | 1413120      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006137791 |\n",
      "|    clip_fraction        | 0.0084       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.437       |\n",
      "|    explained_variance   | 0.857        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 156          |\n",
      "|    n_updates            | 6890         |\n",
      "|    policy_gradient_loss | -0.00129     |\n",
      "|    value_loss           | 310          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.14e+03     |\n",
      "|    ep_rew_mean          | 1.97e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 691          |\n",
      "|    time_elapsed         | 24666        |\n",
      "|    total_timesteps      | 1415168      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014716565 |\n",
      "|    clip_fraction        | 0.00391      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.45        |\n",
      "|    explained_variance   | 0.679        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 667          |\n",
      "|    n_updates            | 6900         |\n",
      "|    policy_gradient_loss | -0.00121     |\n",
      "|    value_loss           | 622          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.96e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 692          |\n",
      "|    time_elapsed         | 24706        |\n",
      "|    total_timesteps      | 1417216      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005209986 |\n",
      "|    clip_fraction        | 0.00845      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.448       |\n",
      "|    explained_variance   | 0.631        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 161          |\n",
      "|    n_updates            | 6910         |\n",
      "|    policy_gradient_loss | 2.07e-05     |\n",
      "|    value_loss           | 625          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.13e+03      |\n",
      "|    ep_rew_mean          | 1.96e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 693           |\n",
      "|    time_elapsed         | 24746         |\n",
      "|    total_timesteps      | 1419264       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00034747124 |\n",
      "|    clip_fraction        | 0.00112       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.446        |\n",
      "|    explained_variance   | 0.695         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 93.8          |\n",
      "|    n_updates            | 6920          |\n",
      "|    policy_gradient_loss | -0.000148     |\n",
      "|    value_loss           | 557           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.96e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 694          |\n",
      "|    time_elapsed         | 24784        |\n",
      "|    total_timesteps      | 1421312      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014843107 |\n",
      "|    clip_fraction        | 0.0208       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.522       |\n",
      "|    explained_variance   | 0.83         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 117          |\n",
      "|    n_updates            | 6930         |\n",
      "|    policy_gradient_loss | -0.000992    |\n",
      "|    value_loss           | 331          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.96e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 695          |\n",
      "|    time_elapsed         | 24821        |\n",
      "|    total_timesteps      | 1423360      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006983036 |\n",
      "|    clip_fraction        | 0.00171      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.46        |\n",
      "|    explained_variance   | 0.778        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 105          |\n",
      "|    n_updates            | 6940         |\n",
      "|    policy_gradient_loss | -0.000446    |\n",
      "|    value_loss           | 471          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.97e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 696          |\n",
      "|    time_elapsed         | 24858        |\n",
      "|    total_timesteps      | 1425408      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0021895026 |\n",
      "|    clip_fraction        | 0.0119       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.484       |\n",
      "|    explained_variance   | 0.671        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 227          |\n",
      "|    n_updates            | 6950         |\n",
      "|    policy_gradient_loss | -0.00102     |\n",
      "|    value_loss           | 601          |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 1.12e+03   |\n",
      "|    ep_rew_mean          | 1.94e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 57         |\n",
      "|    iterations           | 697        |\n",
      "|    time_elapsed         | 24898      |\n",
      "|    total_timesteps      | 1427456    |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.00062353 |\n",
      "|    clip_fraction        | 0.00151    |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -0.381     |\n",
      "|    explained_variance   | 0.859      |\n",
      "|    learning_rate        | 1e-06      |\n",
      "|    loss                 | 153        |\n",
      "|    n_updates            | 6960       |\n",
      "|    policy_gradient_loss | -0.000203  |\n",
      "|    value_loss           | 308        |\n",
      "----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.13e+03      |\n",
      "|    ep_rew_mean          | 1.95e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 698           |\n",
      "|    time_elapsed         | 24936         |\n",
      "|    total_timesteps      | 1429504       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00040725095 |\n",
      "|    clip_fraction        | 9.77e-05      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.366        |\n",
      "|    explained_variance   | 0.797         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 108           |\n",
      "|    n_updates            | 6970          |\n",
      "|    policy_gradient_loss | -0.000476     |\n",
      "|    value_loss           | 575           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.14e+03      |\n",
      "|    ep_rew_mean          | 1.97e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 699           |\n",
      "|    time_elapsed         | 24974         |\n",
      "|    total_timesteps      | 1431552       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00017661188 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.376        |\n",
      "|    explained_variance   | 0.673         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 610           |\n",
      "|    n_updates            | 6980          |\n",
      "|    policy_gradient_loss | 4.12e-05      |\n",
      "|    value_loss           | 637           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.96e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 700          |\n",
      "|    time_elapsed         | 25012        |\n",
      "|    total_timesteps      | 1433600      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011378534 |\n",
      "|    clip_fraction        | 0.0135       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.4         |\n",
      "|    explained_variance   | 0.726        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 317          |\n",
      "|    n_updates            | 6990         |\n",
      "|    policy_gradient_loss | -0.00125     |\n",
      "|    value_loss           | 461          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.95e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 701          |\n",
      "|    time_elapsed         | 25051        |\n",
      "|    total_timesteps      | 1435648      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0021649743 |\n",
      "|    clip_fraction        | 0.0101       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.38        |\n",
      "|    explained_variance   | 0.681        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 104          |\n",
      "|    n_updates            | 7000         |\n",
      "|    policy_gradient_loss | -0.000891    |\n",
      "|    value_loss           | 505          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 702          |\n",
      "|    time_elapsed         | 25091        |\n",
      "|    total_timesteps      | 1437696      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011866621 |\n",
      "|    clip_fraction        | 0.0278       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.453       |\n",
      "|    explained_variance   | 0.667        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 218          |\n",
      "|    n_updates            | 7010         |\n",
      "|    policy_gradient_loss | -0.00105     |\n",
      "|    value_loss           | 588          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.09e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 703          |\n",
      "|    time_elapsed         | 25130        |\n",
      "|    total_timesteps      | 1439744      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005195108 |\n",
      "|    clip_fraction        | 0.00195      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.399       |\n",
      "|    explained_variance   | 0.695        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 356          |\n",
      "|    n_updates            | 7020         |\n",
      "|    policy_gradient_loss | -0.000523    |\n",
      "|    value_loss           | 562          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.09e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 704           |\n",
      "|    time_elapsed         | 25169         |\n",
      "|    total_timesteps      | 1441792       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00034283398 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.408        |\n",
      "|    explained_variance   | 0.509         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 275           |\n",
      "|    n_updates            | 7030          |\n",
      "|    policy_gradient_loss | 0.0003        |\n",
      "|    value_loss           | 512           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.08e+03     |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 705          |\n",
      "|    time_elapsed         | 25205        |\n",
      "|    total_timesteps      | 1443840      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007013325 |\n",
      "|    clip_fraction        | 0.00195      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.425       |\n",
      "|    explained_variance   | 0.702        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 215          |\n",
      "|    n_updates            | 7040         |\n",
      "|    policy_gradient_loss | -0.000642    |\n",
      "|    value_loss           | 489          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.09e+03     |\n",
      "|    ep_rew_mean          | 1.93e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 706          |\n",
      "|    time_elapsed         | 25243        |\n",
      "|    total_timesteps      | 1445888      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0026082573 |\n",
      "|    clip_fraction        | 0.0196       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.422       |\n",
      "|    explained_variance   | 0.777        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 199          |\n",
      "|    n_updates            | 7050         |\n",
      "|    policy_gradient_loss | -0.00187     |\n",
      "|    value_loss           | 438          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.08e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 707          |\n",
      "|    time_elapsed         | 25282        |\n",
      "|    total_timesteps      | 1447936      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007082843 |\n",
      "|    clip_fraction        | 0.00459      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.356       |\n",
      "|    explained_variance   | 0.736        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 491          |\n",
      "|    n_updates            | 7060         |\n",
      "|    policy_gradient_loss | -0.000425    |\n",
      "|    value_loss           | 476          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.93e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 708          |\n",
      "|    time_elapsed         | 25321        |\n",
      "|    total_timesteps      | 1449984      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005006846 |\n",
      "|    clip_fraction        | 0.00229      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.387       |\n",
      "|    explained_variance   | 0.775        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 317          |\n",
      "|    n_updates            | 7070         |\n",
      "|    policy_gradient_loss | -0.00055     |\n",
      "|    value_loss           | 442          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.95e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 709          |\n",
      "|    time_elapsed         | 25361        |\n",
      "|    total_timesteps      | 1452032      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014041876 |\n",
      "|    clip_fraction        | 0.0124       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.423       |\n",
      "|    explained_variance   | 0.753        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 253          |\n",
      "|    n_updates            | 7080         |\n",
      "|    policy_gradient_loss | -0.000336    |\n",
      "|    value_loss           | 436          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.95e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 710           |\n",
      "|    time_elapsed         | 25399         |\n",
      "|    total_timesteps      | 1454080       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00031057466 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.457        |\n",
      "|    explained_variance   | 0.697         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 321           |\n",
      "|    n_updates            | 7090          |\n",
      "|    policy_gradient_loss | -0.000252     |\n",
      "|    value_loss           | 587           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.95e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 711           |\n",
      "|    time_elapsed         | 25441         |\n",
      "|    total_timesteps      | 1456128       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00035281954 |\n",
      "|    clip_fraction        | 0.000488      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.451        |\n",
      "|    explained_variance   | 0.702         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 82.9          |\n",
      "|    n_updates            | 7100          |\n",
      "|    policy_gradient_loss | -0.000683     |\n",
      "|    value_loss           | 552           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.1e+03       |\n",
      "|    ep_rew_mean          | 1.94e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 712           |\n",
      "|    time_elapsed         | 25480         |\n",
      "|    total_timesteps      | 1458176       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00067710015 |\n",
      "|    clip_fraction        | 0.000342      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.438        |\n",
      "|    explained_variance   | 0.609         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 295           |\n",
      "|    n_updates            | 7110          |\n",
      "|    policy_gradient_loss | -0.000193     |\n",
      "|    value_loss           | 574           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.95e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 713          |\n",
      "|    time_elapsed         | 25521        |\n",
      "|    total_timesteps      | 1460224      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007923882 |\n",
      "|    clip_fraction        | 0.00767      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.455       |\n",
      "|    explained_variance   | 0.752        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 256          |\n",
      "|    n_updates            | 7120         |\n",
      "|    policy_gradient_loss | -0.000444    |\n",
      "|    value_loss           | 445          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.94e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 714          |\n",
      "|    time_elapsed         | 25560        |\n",
      "|    total_timesteps      | 1462272      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0020101848 |\n",
      "|    clip_fraction        | 0.0133       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.42        |\n",
      "|    explained_variance   | 0.68         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 446          |\n",
      "|    n_updates            | 7130         |\n",
      "|    policy_gradient_loss | -0.00178     |\n",
      "|    value_loss           | 498          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.94e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 715          |\n",
      "|    time_elapsed         | 25598        |\n",
      "|    total_timesteps      | 1464320      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011767442 |\n",
      "|    clip_fraction        | 0.0204       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.418       |\n",
      "|    explained_variance   | 0.699        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 143          |\n",
      "|    n_updates            | 7140         |\n",
      "|    policy_gradient_loss | -0.00274     |\n",
      "|    value_loss           | 511          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.1e+03     |\n",
      "|    ep_rew_mean          | 1.94e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 716         |\n",
      "|    time_elapsed         | 25639       |\n",
      "|    total_timesteps      | 1466368     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.000566168 |\n",
      "|    clip_fraction        | 4.88e-05    |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.442      |\n",
      "|    explained_variance   | 0.782       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 311         |\n",
      "|    n_updates            | 7150        |\n",
      "|    policy_gradient_loss | -0.000302   |\n",
      "|    value_loss           | 509         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.1e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 717         |\n",
      "|    time_elapsed         | 25679       |\n",
      "|    total_timesteps      | 1468416     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001333591 |\n",
      "|    clip_fraction        | 0.00596     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.388      |\n",
      "|    explained_variance   | 0.843       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 199         |\n",
      "|    n_updates            | 7160        |\n",
      "|    policy_gradient_loss | 0.000199    |\n",
      "|    value_loss           | 243         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 718          |\n",
      "|    time_elapsed         | 25718        |\n",
      "|    total_timesteps      | 1470464      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0003698816 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.356       |\n",
      "|    explained_variance   | 0.763        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 147          |\n",
      "|    n_updates            | 7170         |\n",
      "|    policy_gradient_loss | -0.00055     |\n",
      "|    value_loss           | 485          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 719           |\n",
      "|    time_elapsed         | 25756         |\n",
      "|    total_timesteps      | 1472512       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00069945655 |\n",
      "|    clip_fraction        | 0.000244      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.333        |\n",
      "|    explained_variance   | 0.833         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 216           |\n",
      "|    n_updates            | 7180          |\n",
      "|    policy_gradient_loss | -0.000164     |\n",
      "|    value_loss           | 309           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 720          |\n",
      "|    time_elapsed         | 25794        |\n",
      "|    total_timesteps      | 1474560      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0004013855 |\n",
      "|    clip_fraction        | 0.00103      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.323       |\n",
      "|    explained_variance   | 0.749        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 664          |\n",
      "|    n_updates            | 7190         |\n",
      "|    policy_gradient_loss | -0.00077     |\n",
      "|    value_loss           | 529          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.1e+03       |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 721           |\n",
      "|    time_elapsed         | 25833         |\n",
      "|    total_timesteps      | 1476608       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00039457632 |\n",
      "|    clip_fraction        | 9.77e-05      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.342        |\n",
      "|    explained_variance   | 0.792         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 286           |\n",
      "|    n_updates            | 7200          |\n",
      "|    policy_gradient_loss | -0.000374     |\n",
      "|    value_loss           | 491           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.1e+03       |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 722           |\n",
      "|    time_elapsed         | 25872         |\n",
      "|    total_timesteps      | 1478656       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00060110283 |\n",
      "|    clip_fraction        | 0.00166       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.401        |\n",
      "|    explained_variance   | 0.745         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 170           |\n",
      "|    n_updates            | 7210          |\n",
      "|    policy_gradient_loss | -0.00011      |\n",
      "|    value_loss           | 537           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.1e+03       |\n",
      "|    ep_rew_mean          | 1.9e+03       |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 723           |\n",
      "|    time_elapsed         | 25911         |\n",
      "|    total_timesteps      | 1480704       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00094309077 |\n",
      "|    clip_fraction        | 0.00874       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.385        |\n",
      "|    explained_variance   | 0.85          |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 127           |\n",
      "|    n_updates            | 7220          |\n",
      "|    policy_gradient_loss | -0.00102      |\n",
      "|    value_loss           | 360           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 724          |\n",
      "|    time_elapsed         | 25949        |\n",
      "|    total_timesteps      | 1482752      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0003391711 |\n",
      "|    clip_fraction        | 0.000195     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.398       |\n",
      "|    explained_variance   | 0.823        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 182          |\n",
      "|    n_updates            | 7230         |\n",
      "|    policy_gradient_loss | -0.000286    |\n",
      "|    value_loss           | 407          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.08e+03     |\n",
      "|    ep_rew_mean          | 1.89e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 725          |\n",
      "|    time_elapsed         | 25987        |\n",
      "|    total_timesteps      | 1484800      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0013331705 |\n",
      "|    clip_fraction        | 0.0159       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.403       |\n",
      "|    explained_variance   | 0.819        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 621          |\n",
      "|    n_updates            | 7240         |\n",
      "|    policy_gradient_loss | -0.00156     |\n",
      "|    value_loss           | 461          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.09e+03     |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 726          |\n",
      "|    time_elapsed         | 26026        |\n",
      "|    total_timesteps      | 1486848      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005774632 |\n",
      "|    clip_fraction        | 0.00229      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.444       |\n",
      "|    explained_variance   | 0.724        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 474          |\n",
      "|    n_updates            | 7250         |\n",
      "|    policy_gradient_loss | -0.000485    |\n",
      "|    value_loss           | 607          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.09e+03     |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 727          |\n",
      "|    time_elapsed         | 26065        |\n",
      "|    total_timesteps      | 1488896      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007924993 |\n",
      "|    clip_fraction        | 0.0041       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.44        |\n",
      "|    explained_variance   | 0.777        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 213          |\n",
      "|    n_updates            | 7260         |\n",
      "|    policy_gradient_loss | -0.00074     |\n",
      "|    value_loss           | 514          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.09e+03    |\n",
      "|    ep_rew_mean          | 1.89e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 728         |\n",
      "|    time_elapsed         | 26107       |\n",
      "|    total_timesteps      | 1490944     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001422498 |\n",
      "|    clip_fraction        | 0.00327     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.424      |\n",
      "|    explained_variance   | 0.872       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 82.7        |\n",
      "|    n_updates            | 7270        |\n",
      "|    policy_gradient_loss | -0.000415   |\n",
      "|    value_loss           | 313         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.09e+03     |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 729          |\n",
      "|    time_elapsed         | 26145        |\n",
      "|    total_timesteps      | 1492992      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010365145 |\n",
      "|    clip_fraction        | 0.0216       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.488       |\n",
      "|    explained_variance   | 0.74         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 116          |\n",
      "|    n_updates            | 7280         |\n",
      "|    policy_gradient_loss | -0.000434    |\n",
      "|    value_loss           | 553          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.09e+03     |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 730          |\n",
      "|    time_elapsed         | 26186        |\n",
      "|    total_timesteps      | 1495040      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011535073 |\n",
      "|    clip_fraction        | 0.00356      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.506       |\n",
      "|    explained_variance   | 0.794        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 141          |\n",
      "|    n_updates            | 7290         |\n",
      "|    policy_gradient_loss | -0.000682    |\n",
      "|    value_loss           | 483          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.09e+03     |\n",
      "|    ep_rew_mean          | 1.89e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 731          |\n",
      "|    time_elapsed         | 26225        |\n",
      "|    total_timesteps      | 1497088      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0004614385 |\n",
      "|    clip_fraction        | 4.88e-05     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.489       |\n",
      "|    explained_variance   | 0.869        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 118          |\n",
      "|    n_updates            | 7300         |\n",
      "|    policy_gradient_loss | -2.72e-05    |\n",
      "|    value_loss           | 345          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 732          |\n",
      "|    time_elapsed         | 26265        |\n",
      "|    total_timesteps      | 1499136      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0017598403 |\n",
      "|    clip_fraction        | 0.00454      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.484       |\n",
      "|    explained_variance   | 0.693        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 102          |\n",
      "|    n_updates            | 7310         |\n",
      "|    policy_gradient_loss | -0.00117     |\n",
      "|    value_loss           | 545          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 733          |\n",
      "|    time_elapsed         | 26306        |\n",
      "|    total_timesteps      | 1501184      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012667067 |\n",
      "|    clip_fraction        | 0.0082       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.397       |\n",
      "|    explained_variance   | 0.855        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 143          |\n",
      "|    n_updates            | 7320         |\n",
      "|    policy_gradient_loss | -0.00112     |\n",
      "|    value_loss           | 258          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.93e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 734          |\n",
      "|    time_elapsed         | 26343        |\n",
      "|    total_timesteps      | 1503232      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0009078777 |\n",
      "|    clip_fraction        | 0.00415      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.45        |\n",
      "|    explained_variance   | 0.878        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 140          |\n",
      "|    n_updates            | 7330         |\n",
      "|    policy_gradient_loss | -0.001       |\n",
      "|    value_loss           | 453          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.11e+03    |\n",
      "|    ep_rew_mean          | 1.92e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 735         |\n",
      "|    time_elapsed         | 26383       |\n",
      "|    total_timesteps      | 1505280     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001833647 |\n",
      "|    clip_fraction        | 0.0202      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.504      |\n",
      "|    explained_variance   | 0.892       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 296         |\n",
      "|    n_updates            | 7340        |\n",
      "|    policy_gradient_loss | -0.00108    |\n",
      "|    value_loss           | 398         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.09e+03     |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 736          |\n",
      "|    time_elapsed         | 26422        |\n",
      "|    total_timesteps      | 1507328      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005882438 |\n",
      "|    clip_fraction        | 0.00298      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.46        |\n",
      "|    explained_variance   | 0.83         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 175          |\n",
      "|    n_updates            | 7350         |\n",
      "|    policy_gradient_loss | -0.000899    |\n",
      "|    value_loss           | 378          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.09e+03     |\n",
      "|    ep_rew_mean          | 1.89e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 737          |\n",
      "|    time_elapsed         | 26461        |\n",
      "|    total_timesteps      | 1509376      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0026519762 |\n",
      "|    clip_fraction        | 0.0614       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.525       |\n",
      "|    explained_variance   | 0.71         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 182          |\n",
      "|    n_updates            | 7360         |\n",
      "|    policy_gradient_loss | -0.00447     |\n",
      "|    value_loss           | 561          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.09e+03    |\n",
      "|    ep_rew_mean          | 1.88e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 57          |\n",
      "|    iterations           | 738         |\n",
      "|    time_elapsed         | 26502       |\n",
      "|    total_timesteps      | 1511424     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001405355 |\n",
      "|    clip_fraction        | 0.00435     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.519      |\n",
      "|    explained_variance   | 0.856       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 196         |\n",
      "|    n_updates            | 7370        |\n",
      "|    policy_gradient_loss | -0.000228   |\n",
      "|    value_loss           | 439         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.09e+03     |\n",
      "|    ep_rew_mean          | 1.89e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 739          |\n",
      "|    time_elapsed         | 26540        |\n",
      "|    total_timesteps      | 1513472      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010774506 |\n",
      "|    clip_fraction        | 4.88e-05     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.498       |\n",
      "|    explained_variance   | 0.787        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 165          |\n",
      "|    n_updates            | 7380         |\n",
      "|    policy_gradient_loss | -0.000141    |\n",
      "|    value_loss           | 436          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.09e+03     |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 740          |\n",
      "|    time_elapsed         | 26577        |\n",
      "|    total_timesteps      | 1515520      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0013398873 |\n",
      "|    clip_fraction        | 0.00547      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.431       |\n",
      "|    explained_variance   | 0.84         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 199          |\n",
      "|    n_updates            | 7390         |\n",
      "|    policy_gradient_loss | -0.000915    |\n",
      "|    value_loss           | 334          |\n",
      "------------------------------------------\n",
      "--------------------------------------------\n",
      "| rollout/                |                |\n",
      "|    ep_len_mean          | 1.08e+03       |\n",
      "|    ep_rew_mean          | 1.88e+03       |\n",
      "| time/                   |                |\n",
      "|    fps                  | 57             |\n",
      "|    iterations           | 741            |\n",
      "|    time_elapsed         | 26615          |\n",
      "|    total_timesteps      | 1517568        |\n",
      "| train/                  |                |\n",
      "|    approx_kl            | 0.000116067764 |\n",
      "|    clip_fraction        | 0              |\n",
      "|    clip_range           | 0.2            |\n",
      "|    entropy_loss         | -0.384         |\n",
      "|    explained_variance   | 0.756          |\n",
      "|    learning_rate        | 1e-06          |\n",
      "|    loss                 | 274            |\n",
      "|    n_updates            | 7400           |\n",
      "|    policy_gradient_loss | 0.000123       |\n",
      "|    value_loss           | 394            |\n",
      "--------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.07e+03      |\n",
      "|    ep_rew_mean          | 1.86e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 57            |\n",
      "|    iterations           | 742           |\n",
      "|    time_elapsed         | 26653         |\n",
      "|    total_timesteps      | 1519616       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00096103473 |\n",
      "|    clip_fraction        | 0.00225       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.358        |\n",
      "|    explained_variance   | 0.855         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 234           |\n",
      "|    n_updates            | 7410          |\n",
      "|    policy_gradient_loss | -0.000262     |\n",
      "|    value_loss           | 358           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.08e+03     |\n",
      "|    ep_rew_mean          | 1.87e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 743          |\n",
      "|    time_elapsed         | 26692        |\n",
      "|    total_timesteps      | 1521664      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007187837 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.341       |\n",
      "|    explained_variance   | 0.778        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 312          |\n",
      "|    n_updates            | 7420         |\n",
      "|    policy_gradient_loss | -0.000338    |\n",
      "|    value_loss           | 531          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.08e+03     |\n",
      "|    ep_rew_mean          | 1.88e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 57           |\n",
      "|    iterations           | 744          |\n",
      "|    time_elapsed         | 26730        |\n",
      "|    total_timesteps      | 1523712      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014337362 |\n",
      "|    clip_fraction        | 0.0139       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.36        |\n",
      "|    explained_variance   | 0.816        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 226          |\n",
      "|    n_updates            | 7430         |\n",
      "|    policy_gradient_loss | -0.00131     |\n",
      "|    value_loss           | 452          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.08e+03      |\n",
      "|    ep_rew_mean          | 1.88e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 745           |\n",
      "|    time_elapsed         | 26770         |\n",
      "|    total_timesteps      | 1525760       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00050082215 |\n",
      "|    clip_fraction        | 4.88e-05      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.376        |\n",
      "|    explained_variance   | 0.822         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 105           |\n",
      "|    n_updates            | 7440          |\n",
      "|    policy_gradient_loss | -0.000202     |\n",
      "|    value_loss           | 448           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.08e+03      |\n",
      "|    ep_rew_mean          | 1.87e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 746           |\n",
      "|    time_elapsed         | 26810         |\n",
      "|    total_timesteps      | 1527808       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00048187256 |\n",
      "|    clip_fraction        | 0.00244       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.413        |\n",
      "|    explained_variance   | 0.676         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 85.9          |\n",
      "|    n_updates            | 7450          |\n",
      "|    policy_gradient_loss | -0.000773     |\n",
      "|    value_loss           | 537           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.07e+03     |\n",
      "|    ep_rew_mean          | 1.87e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 747          |\n",
      "|    time_elapsed         | 26846        |\n",
      "|    total_timesteps      | 1529856      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011413019 |\n",
      "|    clip_fraction        | 0.00664      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.367       |\n",
      "|    explained_variance   | 0.743        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 226          |\n",
      "|    n_updates            | 7460         |\n",
      "|    policy_gradient_loss | -0.00153     |\n",
      "|    value_loss           | 544          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.06e+03     |\n",
      "|    ep_rew_mean          | 1.85e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 748          |\n",
      "|    time_elapsed         | 26884        |\n",
      "|    total_timesteps      | 1531904      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010676952 |\n",
      "|    clip_fraction        | 0.00605      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.359       |\n",
      "|    explained_variance   | 0.655        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 349          |\n",
      "|    n_updates            | 7470         |\n",
      "|    policy_gradient_loss | -0.000645    |\n",
      "|    value_loss           | 476          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.06e+03     |\n",
      "|    ep_rew_mean          | 1.86e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 749          |\n",
      "|    time_elapsed         | 26922        |\n",
      "|    total_timesteps      | 1533952      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012627387 |\n",
      "|    clip_fraction        | 0.00825      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.406       |\n",
      "|    explained_variance   | 0.775        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 249          |\n",
      "|    n_updates            | 7480         |\n",
      "|    policy_gradient_loss | -0.0019      |\n",
      "|    value_loss           | 491          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.06e+03     |\n",
      "|    ep_rew_mean          | 1.86e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 750          |\n",
      "|    time_elapsed         | 26959        |\n",
      "|    total_timesteps      | 1536000      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008017189 |\n",
      "|    clip_fraction        | 0.00112      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.364       |\n",
      "|    explained_variance   | 0.757        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 94.3         |\n",
      "|    n_updates            | 7490         |\n",
      "|    policy_gradient_loss | -0.000166    |\n",
      "|    value_loss           | 459          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.06e+03     |\n",
      "|    ep_rew_mean          | 1.87e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 751          |\n",
      "|    time_elapsed         | 26997        |\n",
      "|    total_timesteps      | 1538048      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0016196688 |\n",
      "|    clip_fraction        | 0.0244       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.287       |\n",
      "|    explained_variance   | 0.791        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 171          |\n",
      "|    n_updates            | 7500         |\n",
      "|    policy_gradient_loss | -0.00144     |\n",
      "|    value_loss           | 454          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.07e+03     |\n",
      "|    ep_rew_mean          | 1.87e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 752          |\n",
      "|    time_elapsed         | 27036        |\n",
      "|    total_timesteps      | 1540096      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008655205 |\n",
      "|    clip_fraction        | 0.0147       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.327       |\n",
      "|    explained_variance   | 0.788        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 310          |\n",
      "|    n_updates            | 7510         |\n",
      "|    policy_gradient_loss | -0.00159     |\n",
      "|    value_loss           | 544          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.08e+03     |\n",
      "|    ep_rew_mean          | 1.88e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 753          |\n",
      "|    time_elapsed         | 27074        |\n",
      "|    total_timesteps      | 1542144      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011978945 |\n",
      "|    clip_fraction        | 0.0195       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.377       |\n",
      "|    explained_variance   | 0.728        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 217          |\n",
      "|    n_updates            | 7520         |\n",
      "|    policy_gradient_loss | -0.00137     |\n",
      "|    value_loss           | 667          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.09e+03     |\n",
      "|    ep_rew_mean          | 1.9e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 754          |\n",
      "|    time_elapsed         | 27113        |\n",
      "|    total_timesteps      | 1544192      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007205962 |\n",
      "|    clip_fraction        | 0.00864      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.411       |\n",
      "|    explained_variance   | 0.782        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 99.5         |\n",
      "|    n_updates            | 7530         |\n",
      "|    policy_gradient_loss | -0.000826    |\n",
      "|    value_loss           | 459          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.09e+03      |\n",
      "|    ep_rew_mean          | 1.89e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 755           |\n",
      "|    time_elapsed         | 27151         |\n",
      "|    total_timesteps      | 1546240       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00090896897 |\n",
      "|    clip_fraction        | 0.00103       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.377        |\n",
      "|    explained_variance   | 0.892         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 84.3          |\n",
      "|    n_updates            | 7540          |\n",
      "|    policy_gradient_loss | -0.000617     |\n",
      "|    value_loss           | 321           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 756          |\n",
      "|    time_elapsed         | 27188        |\n",
      "|    total_timesteps      | 1548288      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0009022778 |\n",
      "|    clip_fraction        | 9.77e-05     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.401       |\n",
      "|    explained_variance   | 0.818        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 187          |\n",
      "|    n_updates            | 7550         |\n",
      "|    policy_gradient_loss | -3.75e-05    |\n",
      "|    value_loss           | 403          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.92e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 757           |\n",
      "|    time_elapsed         | 27224         |\n",
      "|    total_timesteps      | 1550336       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00092502125 |\n",
      "|    clip_fraction        | 0.00654       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.416        |\n",
      "|    explained_variance   | 0.783         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 423           |\n",
      "|    n_updates            | 7560          |\n",
      "|    policy_gradient_loss | -0.00128      |\n",
      "|    value_loss           | 553           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 758          |\n",
      "|    time_elapsed         | 27260        |\n",
      "|    total_timesteps      | 1552384      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006952807 |\n",
      "|    clip_fraction        | 0.00308      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.387       |\n",
      "|    explained_variance   | 0.848        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 81.8         |\n",
      "|    n_updates            | 7570         |\n",
      "|    policy_gradient_loss | -0.000661    |\n",
      "|    value_loss           | 316          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 759          |\n",
      "|    time_elapsed         | 27296        |\n",
      "|    total_timesteps      | 1554432      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008533776 |\n",
      "|    clip_fraction        | 0.00542      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.476       |\n",
      "|    explained_variance   | 0.775        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 199          |\n",
      "|    n_updates            | 7580         |\n",
      "|    policy_gradient_loss | 0.000218     |\n",
      "|    value_loss           | 619          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 760          |\n",
      "|    time_elapsed         | 27332        |\n",
      "|    total_timesteps      | 1556480      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0009902939 |\n",
      "|    clip_fraction        | 0.00132      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.438       |\n",
      "|    explained_variance   | 0.857        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 104          |\n",
      "|    n_updates            | 7590         |\n",
      "|    policy_gradient_loss | -0.00038     |\n",
      "|    value_loss           | 390          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 761          |\n",
      "|    time_elapsed         | 27368        |\n",
      "|    total_timesteps      | 1558528      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005508821 |\n",
      "|    clip_fraction        | 0.00303      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.416       |\n",
      "|    explained_variance   | 0.72         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 58.3         |\n",
      "|    n_updates            | 7600         |\n",
      "|    policy_gradient_loss | -0.000798    |\n",
      "|    value_loss           | 535          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 762          |\n",
      "|    time_elapsed         | 27405        |\n",
      "|    total_timesteps      | 1560576      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008996425 |\n",
      "|    clip_fraction        | 0.00728      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.451       |\n",
      "|    explained_variance   | 0.798        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 69           |\n",
      "|    n_updates            | 7610         |\n",
      "|    policy_gradient_loss | -0.000321    |\n",
      "|    value_loss           | 376          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.11e+03      |\n",
      "|    ep_rew_mean          | 1.89e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 763           |\n",
      "|    time_elapsed         | 27439         |\n",
      "|    total_timesteps      | 1562624       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00039421354 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.448        |\n",
      "|    explained_variance   | 0.784         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 366           |\n",
      "|    n_updates            | 7620          |\n",
      "|    policy_gradient_loss | -0.000116     |\n",
      "|    value_loss           | 473           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.1e+03      |\n",
      "|    ep_rew_mean          | 1.89e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 764          |\n",
      "|    time_elapsed         | 27475        |\n",
      "|    total_timesteps      | 1564672      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0025021743 |\n",
      "|    clip_fraction        | 0.0133       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.416       |\n",
      "|    explained_variance   | 0.843        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 157          |\n",
      "|    n_updates            | 7630         |\n",
      "|    policy_gradient_loss | -0.000236    |\n",
      "|    value_loss           | 440          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.11e+03     |\n",
      "|    ep_rew_mean          | 1.89e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 765          |\n",
      "|    time_elapsed         | 27510        |\n",
      "|    total_timesteps      | 1566720      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010459349 |\n",
      "|    clip_fraction        | 0.0169       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.416       |\n",
      "|    explained_variance   | 0.651        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 228          |\n",
      "|    n_updates            | 7640         |\n",
      "|    policy_gradient_loss | 0.000193     |\n",
      "|    value_loss           | 648          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.12e+03     |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 766          |\n",
      "|    time_elapsed         | 27549        |\n",
      "|    total_timesteps      | 1568768      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005422456 |\n",
      "|    clip_fraction        | 0.00298      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.395       |\n",
      "|    explained_variance   | 0.748        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 132          |\n",
      "|    n_updates            | 7650         |\n",
      "|    policy_gradient_loss | -0.000303    |\n",
      "|    value_loss           | 324          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.94e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 767          |\n",
      "|    time_elapsed         | 27585        |\n",
      "|    total_timesteps      | 1570816      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0020345133 |\n",
      "|    clip_fraction        | 0.00835      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.476       |\n",
      "|    explained_variance   | 0.799        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 181          |\n",
      "|    n_updates            | 7660         |\n",
      "|    policy_gradient_loss | 0.000325     |\n",
      "|    value_loss           | 410          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.14e+03     |\n",
      "|    ep_rew_mean          | 1.95e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 768          |\n",
      "|    time_elapsed         | 27623        |\n",
      "|    total_timesteps      | 1572864      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0013340312 |\n",
      "|    clip_fraction        | 0.00806      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.417       |\n",
      "|    explained_variance   | 0.844        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 174          |\n",
      "|    n_updates            | 7670         |\n",
      "|    policy_gradient_loss | -0.000769    |\n",
      "|    value_loss           | 341          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 1.97e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 769          |\n",
      "|    time_elapsed         | 27665        |\n",
      "|    total_timesteps      | 1574912      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008415313 |\n",
      "|    clip_fraction        | 0.00503      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.422       |\n",
      "|    explained_variance   | 0.847        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 261          |\n",
      "|    n_updates            | 7680         |\n",
      "|    policy_gradient_loss | -0.00184     |\n",
      "|    value_loss           | 287          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.15e+03     |\n",
      "|    ep_rew_mean          | 1.96e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 770          |\n",
      "|    time_elapsed         | 27703        |\n",
      "|    total_timesteps      | 1576960      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007484321 |\n",
      "|    clip_fraction        | 0.0083       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.499       |\n",
      "|    explained_variance   | 0.885        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 145          |\n",
      "|    n_updates            | 7690         |\n",
      "|    policy_gradient_loss | -0.00117     |\n",
      "|    value_loss           | 334          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.15e+03     |\n",
      "|    ep_rew_mean          | 1.94e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 771          |\n",
      "|    time_elapsed         | 27743        |\n",
      "|    total_timesteps      | 1579008      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0029238912 |\n",
      "|    clip_fraction        | 0.0488       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.4         |\n",
      "|    explained_variance   | 0.761        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 130          |\n",
      "|    n_updates            | 7700         |\n",
      "|    policy_gradient_loss | -0.00323     |\n",
      "|    value_loss           | 540          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.15e+03      |\n",
      "|    ep_rew_mean          | 1.96e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 772           |\n",
      "|    time_elapsed         | 27785         |\n",
      "|    total_timesteps      | 1581056       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00051488495 |\n",
      "|    clip_fraction        | 0.00747       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.396        |\n",
      "|    explained_variance   | 0.768         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 228           |\n",
      "|    n_updates            | 7710          |\n",
      "|    policy_gradient_loss | -0.001        |\n",
      "|    value_loss           | 553           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.15e+03      |\n",
      "|    ep_rew_mean          | 1.96e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 773           |\n",
      "|    time_elapsed         | 27824         |\n",
      "|    total_timesteps      | 1583104       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00037848306 |\n",
      "|    clip_fraction        | 0.00391       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.406        |\n",
      "|    explained_variance   | 0.627         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 386           |\n",
      "|    n_updates            | 7720          |\n",
      "|    policy_gradient_loss | -0.000187     |\n",
      "|    value_loss           | 569           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 1.97e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 774          |\n",
      "|    time_elapsed         | 27865        |\n",
      "|    total_timesteps      | 1585152      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011767687 |\n",
      "|    clip_fraction        | 0.000928     |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.418       |\n",
      "|    explained_variance   | 0.847        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 108          |\n",
      "|    n_updates            | 7730         |\n",
      "|    policy_gradient_loss | -0.000687    |\n",
      "|    value_loss           | 327          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.15e+03     |\n",
      "|    ep_rew_mean          | 1.96e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 775          |\n",
      "|    time_elapsed         | 27906        |\n",
      "|    total_timesteps      | 1587200      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005047559 |\n",
      "|    clip_fraction        | 0.00103      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.369       |\n",
      "|    explained_variance   | 0.72         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 173          |\n",
      "|    n_updates            | 7740         |\n",
      "|    policy_gradient_loss | -0.000632    |\n",
      "|    value_loss           | 486          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 1.98e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 776          |\n",
      "|    time_elapsed         | 27946        |\n",
      "|    total_timesteps      | 1589248      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0016400728 |\n",
      "|    clip_fraction        | 0.0186       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.324       |\n",
      "|    explained_variance   | 0.836        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 150          |\n",
      "|    n_updates            | 7750         |\n",
      "|    policy_gradient_loss | -0.0015      |\n",
      "|    value_loss           | 287          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 1.98e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 777          |\n",
      "|    time_elapsed         | 27987        |\n",
      "|    total_timesteps      | 1591296      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014233872 |\n",
      "|    clip_fraction        | 0.0105       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.273       |\n",
      "|    explained_variance   | 0.668        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 315          |\n",
      "|    n_updates            | 7760         |\n",
      "|    policy_gradient_loss | -0.00114     |\n",
      "|    value_loss           | 690          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 1.98e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 778          |\n",
      "|    time_elapsed         | 28026        |\n",
      "|    total_timesteps      | 1593344      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0002659287 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.245       |\n",
      "|    explained_variance   | 0.827        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 64.4         |\n",
      "|    n_updates            | 7770         |\n",
      "|    policy_gradient_loss | -3.08e-06    |\n",
      "|    value_loss           | 322          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.15e+03    |\n",
      "|    ep_rew_mean          | 1.97e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 56          |\n",
      "|    iterations           | 779         |\n",
      "|    time_elapsed         | 28068       |\n",
      "|    total_timesteps      | 1595392     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.000532993 |\n",
      "|    clip_fraction        | 0.00127     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.318      |\n",
      "|    explained_variance   | 0.772       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 308         |\n",
      "|    n_updates            | 7780        |\n",
      "|    policy_gradient_loss | -0.00057    |\n",
      "|    value_loss           | 538         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 1.99e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 780          |\n",
      "|    time_elapsed         | 28111        |\n",
      "|    total_timesteps      | 1597440      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012515232 |\n",
      "|    clip_fraction        | 0.0264       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.339       |\n",
      "|    explained_variance   | 0.826        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 98.1         |\n",
      "|    n_updates            | 7790         |\n",
      "|    policy_gradient_loss | -0.00222     |\n",
      "|    value_loss           | 430          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 2e+03        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 781          |\n",
      "|    time_elapsed         | 28153        |\n",
      "|    total_timesteps      | 1599488      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006254186 |\n",
      "|    clip_fraction        | 0.00244      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.384       |\n",
      "|    explained_variance   | 0.823        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 335          |\n",
      "|    n_updates            | 7800         |\n",
      "|    policy_gradient_loss | -0.00054     |\n",
      "|    value_loss           | 438          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.17e+03      |\n",
      "|    ep_rew_mean          | 1.99e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 782           |\n",
      "|    time_elapsed         | 28199         |\n",
      "|    total_timesteps      | 1601536       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00030210213 |\n",
      "|    clip_fraction        | 0.000488      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.379        |\n",
      "|    explained_variance   | 0.729         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 239           |\n",
      "|    n_updates            | 7810          |\n",
      "|    policy_gradient_loss | -0.000374     |\n",
      "|    value_loss           | 599           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.16e+03      |\n",
      "|    ep_rew_mean          | 1.99e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 783           |\n",
      "|    time_elapsed         | 28240         |\n",
      "|    total_timesteps      | 1603584       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00023703472 |\n",
      "|    clip_fraction        | 0.000537      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.37         |\n",
      "|    explained_variance   | 0.861         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 126           |\n",
      "|    n_updates            | 7820          |\n",
      "|    policy_gradient_loss | -0.000459     |\n",
      "|    value_loss           | 333           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.16e+03     |\n",
      "|    ep_rew_mean          | 1.99e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 784          |\n",
      "|    time_elapsed         | 28279        |\n",
      "|    total_timesteps      | 1605632      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0018659005 |\n",
      "|    clip_fraction        | 0.0257       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.342       |\n",
      "|    explained_variance   | 0.642        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 202          |\n",
      "|    n_updates            | 7830         |\n",
      "|    policy_gradient_loss | -0.00268     |\n",
      "|    value_loss           | 660          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.16e+03    |\n",
      "|    ep_rew_mean          | 1.99e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 56          |\n",
      "|    iterations           | 785         |\n",
      "|    time_elapsed         | 28316       |\n",
      "|    total_timesteps      | 1607680     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.000678154 |\n",
      "|    clip_fraction        | 0.00396     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.345      |\n",
      "|    explained_variance   | 0.785       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 211         |\n",
      "|    n_updates            | 7840        |\n",
      "|    policy_gradient_loss | 0.000191    |\n",
      "|    value_loss           | 520         |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.16e+03      |\n",
      "|    ep_rew_mean          | 1.99e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 786           |\n",
      "|    time_elapsed         | 28353         |\n",
      "|    total_timesteps      | 1609728       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00022416818 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.346        |\n",
      "|    explained_variance   | 0.856         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 94.2          |\n",
      "|    n_updates            | 7850          |\n",
      "|    policy_gradient_loss | -0.000385     |\n",
      "|    value_loss           | 316           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.16e+03      |\n",
      "|    ep_rew_mean          | 1.99e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 787           |\n",
      "|    time_elapsed         | 28392         |\n",
      "|    total_timesteps      | 1611776       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00026985042 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.375        |\n",
      "|    explained_variance   | 0.793         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 170           |\n",
      "|    n_updates            | 7860          |\n",
      "|    policy_gradient_loss | -0.000155     |\n",
      "|    value_loss           | 482           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.15e+03      |\n",
      "|    ep_rew_mean          | 1.98e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 788           |\n",
      "|    time_elapsed         | 28432         |\n",
      "|    total_timesteps      | 1613824       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00031318766 |\n",
      "|    clip_fraction        | 0.00166       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.379        |\n",
      "|    explained_variance   | 0.791         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 129           |\n",
      "|    n_updates            | 7870          |\n",
      "|    policy_gradient_loss | -0.000247     |\n",
      "|    value_loss           | 481           |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.96e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 789          |\n",
      "|    time_elapsed         | 28471        |\n",
      "|    total_timesteps      | 1615872      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010034062 |\n",
      "|    clip_fraction        | 0.00161      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.382       |\n",
      "|    explained_variance   | 0.858        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 246          |\n",
      "|    n_updates            | 7880         |\n",
      "|    policy_gradient_loss | -0.000619    |\n",
      "|    value_loss           | 391          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.95e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 790          |\n",
      "|    time_elapsed         | 28512        |\n",
      "|    total_timesteps      | 1617920      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010838478 |\n",
      "|    clip_fraction        | 0.0108       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.358       |\n",
      "|    explained_variance   | 0.78         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 141          |\n",
      "|    n_updates            | 7890         |\n",
      "|    policy_gradient_loss | -0.00133     |\n",
      "|    value_loss           | 484          |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 1.13e+03      |\n",
      "|    ep_rew_mean          | 1.95e+03      |\n",
      "| time/                   |               |\n",
      "|    fps                  | 56            |\n",
      "|    iterations           | 791           |\n",
      "|    time_elapsed         | 28552         |\n",
      "|    total_timesteps      | 1619968       |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00012250186 |\n",
      "|    clip_fraction        | 4.88e-05      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -0.363        |\n",
      "|    explained_variance   | 0.846         |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 105           |\n",
      "|    n_updates            | 7900          |\n",
      "|    policy_gradient_loss | -0.000184     |\n",
      "|    value_loss           | 458           |\n",
      "-------------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 1.13e+03  |\n",
      "|    ep_rew_mean          | 1.95e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 56        |\n",
      "|    iterations           | 792       |\n",
      "|    time_elapsed         | 28597     |\n",
      "|    total_timesteps      | 1622016   |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.0028822 |\n",
      "|    clip_fraction        | 0.0277    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.679    |\n",
      "|    explained_variance   | 0.865     |\n",
      "|    learning_rate        | 1e-06     |\n",
      "|    loss                 | 328       |\n",
      "|    n_updates            | 7910      |\n",
      "|    policy_gradient_loss | -0.00186  |\n",
      "|    value_loss           | 601       |\n",
      "---------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.13e+03    |\n",
      "|    ep_rew_mean          | 1.95e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 56          |\n",
      "|    iterations           | 793         |\n",
      "|    time_elapsed         | 28637       |\n",
      "|    total_timesteps      | 1624064     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011874962 |\n",
      "|    clip_fraction        | 0.186       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.6        |\n",
      "|    explained_variance   | 0.735       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 80.6        |\n",
      "|    n_updates            | 7920        |\n",
      "|    policy_gradient_loss | -0.00279    |\n",
      "|    value_loss           | 175         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.13e+03    |\n",
      "|    ep_rew_mean          | 1.95e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 56          |\n",
      "|    iterations           | 794         |\n",
      "|    time_elapsed         | 28678       |\n",
      "|    total_timesteps      | 1626112     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.014722314 |\n",
      "|    clip_fraction        | 0.0187      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.52       |\n",
      "|    explained_variance   | -1.36       |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.227       |\n",
      "|    n_updates            | 7930        |\n",
      "|    policy_gradient_loss | -0.00396    |\n",
      "|    value_loss           | 1.12        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 1.13e+03    |\n",
      "|    ep_rew_mean          | 1.95e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 56          |\n",
      "|    iterations           | 795         |\n",
      "|    time_elapsed         | 28718       |\n",
      "|    total_timesteps      | 1628160     |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.002398559 |\n",
      "|    clip_fraction        | 0           |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.45       |\n",
      "|    explained_variance   | -0.984      |\n",
      "|    learning_rate        | 1e-06       |\n",
      "|    loss                 | 0.234       |\n",
      "|    n_updates            | 7940        |\n",
      "|    policy_gradient_loss | -0.00117    |\n",
      "|    value_loss           | 0.762       |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.95e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 796          |\n",
      "|    time_elapsed         | 28760        |\n",
      "|    total_timesteps      | 1630208      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0025539808 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.47        |\n",
      "|    explained_variance   | -0.282       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.218        |\n",
      "|    n_updates            | 7950         |\n",
      "|    policy_gradient_loss | -0.00244     |\n",
      "|    value_loss           | 0.739        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.95e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 797          |\n",
      "|    time_elapsed         | 28801        |\n",
      "|    total_timesteps      | 1632256      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0043520755 |\n",
      "|    clip_fraction        | 0.0146       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.41        |\n",
      "|    explained_variance   | 0.758        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 32.1         |\n",
      "|    n_updates            | 7960         |\n",
      "|    policy_gradient_loss | -0.00237     |\n",
      "|    value_loss           | 109          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.95e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 798          |\n",
      "|    time_elapsed         | 28840        |\n",
      "|    total_timesteps      | 1634304      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0028622553 |\n",
      "|    clip_fraction        | 0.0112       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.49        |\n",
      "|    explained_variance   | -1.86        |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.478        |\n",
      "|    n_updates            | 7970         |\n",
      "|    policy_gradient_loss | -0.000103    |\n",
      "|    value_loss           | 11           |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 1.13e+03     |\n",
      "|    ep_rew_mean          | 1.95e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 56           |\n",
      "|    iterations           | 799          |\n",
      "|    time_elapsed         | 28878        |\n",
      "|    total_timesteps      | 1636352      |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006116319 |\n",
      "|    clip_fraction        | 0.0146       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.54        |\n",
      "|    explained_variance   | -0.392       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 1.11         |\n",
      "|    n_updates            | 7980         |\n",
      "|    policy_gradient_loss | -0.00117     |\n",
      "|    value_loss           | 7.33         |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32mC:\\Users\\RENYIF~1\\AppData\\Local\\Temp/ipykernel_624328/1371844059.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     12\u001b[0m \u001b[0msave_path\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34mr\"F:\\\\RL_Mario\\\\\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     13\u001b[0m \u001b[0mcallback1\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mSaveOnStepCallback\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcheck_freq\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m20000\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0msave_path\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0msave_path\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 14\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlearn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtotal_timesteps\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m5000000\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mcallback\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcallback1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32mD:\\software\\e_anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py\u001b[0m in \u001b[0;36mlearn\u001b[1;34m(self, total_timesteps, callback, log_interval, eval_env, eval_freq, n_eval_episodes, tb_log_name, eval_log_path, reset_num_timesteps)\u001b[0m\n\u001b[0;32m    297\u001b[0m     ) -> \"PPO\":\n\u001b[0;32m    298\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 299\u001b[1;33m         return super(PPO, self).learn(\n\u001b[0m\u001b[0;32m    300\u001b[0m             \u001b[0mtotal_timesteps\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtotal_timesteps\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    301\u001b[0m             \u001b[0mcallback\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcallback\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\software\\e_anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\common\\on_policy_algorithm.py\u001b[0m in \u001b[0;36mlearn\u001b[1;34m(self, total_timesteps, callback, log_interval, eval_env, eval_freq, n_eval_episodes, tb_log_name, eval_log_path, reset_num_timesteps)\u001b[0m\n\u001b[0;32m    248\u001b[0m         \u001b[1;32mwhile\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnum_timesteps\u001b[0m \u001b[1;33m<\u001b[0m \u001b[0mtotal_timesteps\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    249\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 250\u001b[1;33m             \u001b[0mcontinue_training\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcollect_rollouts\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrollout_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mn_rollout_steps\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mn_steps\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    251\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    252\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mcontinue_training\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\software\\e_anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\common\\on_policy_algorithm.py\u001b[0m in \u001b[0;36mcollect_rollouts\u001b[1;34m(self, env, callback, rollout_buffer, n_rollout_steps)\u001b[0m\n\u001b[0;32m    176\u001b[0m                 \u001b[0mclipped_actions\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclip\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0maction_space\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlow\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0maction_space\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhigh\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    177\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 178\u001b[1;33m             \u001b[0mnew_obs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrewards\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdones\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfos\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mclipped_actions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    179\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    180\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnum_timesteps\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnum_envs\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\software\\e_anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\common\\vec_env\\base_vec_env.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, actions)\u001b[0m\n\u001b[0;32m    160\u001b[0m         \"\"\"\n\u001b[0;32m    161\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep_async\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 162\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep_wait\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    163\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    164\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mget_images\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m->\u001b[0m \u001b[0mSequence\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\software\\e_anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\common\\vec_env\\vec_transpose.py\u001b[0m in \u001b[0;36mstep_wait\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m     93\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     94\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mstep_wait\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m->\u001b[0m \u001b[0mVecEnvStepReturn\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 95\u001b[1;33m         \u001b[0mobservations\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrewards\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdones\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfos\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvenv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep_wait\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     96\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     97\u001b[0m         \u001b[1;31m# Transpose the terminal observations\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\software\\e_anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\common\\vec_env\\vec_frame_stack.py\u001b[0m in \u001b[0;36mstep_wait\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m     46\u001b[0m     ) -> Tuple[Union[np.ndarray, Dict[str, np.ndarray]], np.ndarray, np.ndarray, List[Dict[str, Any]],]:\n\u001b[0;32m     47\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 48\u001b[1;33m         \u001b[0mobservations\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrewards\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdones\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfos\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvenv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep_wait\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     49\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     50\u001b[0m         \u001b[0mobservations\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfos\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstackedobs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobservations\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdones\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\software\\e_anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\common\\vec_env\\dummy_vec_env.py\u001b[0m in \u001b[0;36mstep_wait\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m     41\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mstep_wait\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m->\u001b[0m \u001b[0mVecEnvStepReturn\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     42\u001b[0m         \u001b[1;32mfor\u001b[0m \u001b[0menv_idx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnum_envs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 43\u001b[1;33m             obs, self.buf_rews[env_idx], self.buf_dones[env_idx], self.buf_infos[env_idx] = self.envs[env_idx].step(\n\u001b[0m\u001b[0;32m     44\u001b[0m                 \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0menv_idx\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     45\u001b[0m             )\n",
      "\u001b[1;32mD:\\software\\e_anaconda\\envs\\pytorch\\lib\\site-packages\\gym\\core.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, action)\u001b[0m\n\u001b[0;32m    280\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    281\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 282\u001b[1;33m         \u001b[0mobservation\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfo\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    283\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mobservation\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobservation\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfo\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    284\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\software\\e_anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\common\\monitor.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, action)\u001b[0m\n\u001b[0;32m     88\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mneeds_reset\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     89\u001b[0m             \u001b[1;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Tried to step environment that needs reset\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 90\u001b[1;33m         \u001b[0mobservation\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfo\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     91\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrewards\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mreward\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     92\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\software\\e_anaconda\\envs\\pytorch\\lib\\site-packages\\nes_py\\wrappers\\joypad_space.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, action)\u001b[0m\n\u001b[0;32m     72\u001b[0m         \"\"\"\n\u001b[0;32m     73\u001b[0m         \u001b[1;31m# take the step and record the output\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 74\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_action_map\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0maction\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     75\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     76\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mreset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\software\\e_anaconda\\envs\\pytorch\\lib\\site-packages\\gym\\wrappers\\time_limit.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, action)\u001b[0m\n\u001b[0;32m     16\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_elapsed_steps\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     17\u001b[0m         ), \"Cannot call env.step() before calling reset()\"\n\u001b[1;32m---> 18\u001b[1;33m         \u001b[0mobservation\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfo\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     19\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_elapsed_steps\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     20\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_elapsed_steps\u001b[0m \u001b[1;33m>=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_max_episode_steps\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\software\\e_anaconda\\envs\\pytorch\\lib\\site-packages\\nes_py\\nes_env.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, action)\u001b[0m\n\u001b[0;32m    291\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcontrollers\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    292\u001b[0m         \u001b[1;31m# pass the action to the emulator as an unsigned byte\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 293\u001b[1;33m         \u001b[0m_LIB\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mStep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_env\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    294\u001b[0m         \u001b[1;31m# get the reward for this step\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    295\u001b[0m         \u001b[0mreward\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_get_reward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "learning_rate = 1e-6\n",
    "n_steps = 2048\n",
    "\n",
    "\n",
    "tensorboard_log = r'./tensorboard_logs/'\n",
    "model = PPO(\"CnnPolicy\", env, verbose=1,\n",
    "            learning_rate=learning_rate,n_steps=n_steps,\n",
    "            tensorboard_log=tensorboard_log)\n",
    "\n",
    "save_path=r\"F:\\\\RL_Mario\\\\\"\n",
    "callback1=SaveOnStepCallback(check_freq=20000,save_path=save_path)\n",
    "model.learn(total_timesteps=5000000,callback=callback1)"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "9465aae7e0ab1403d672807d1a0963d86dbda2f584fbe3054c36cf78311c6c77"
  },
  "kernelspec": {
   "display_name": "Python 3.8.11 ('pytorch')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.11"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
